본문 바로가기
dev/DB

[DB/ORACLE] 분석함수 및 활용(선형 보간, 선분이력)

by dev_Step 2022. 11. 28.
--NTILE() 으로 나눈 그룹
SELECT C1, COUNT(*) AS C2, SUM(SAL) AS C3 
FROM (SELECT SAL, NTILE(4) OVER (ORDER BY SAL) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;

--CEIL() 으로 나눈 그룹
SELECT C1, COUNT(*) AS C2, SUM(SAL) AS C3 
FROM (SELECT SAL, CEIL( ROW_NUMBER() OVER (ORDER BY SAL, EMPNO) / (COUNT(*) OVER() / 4 )) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;

--ㅡMOD() 으로 나눈 그룹
SELECT C1, COUNT(*) AS  C2, SUM(SAL) AS C3
FROM (SELECT SAL, MOD( ROW_NUMBER() OVER (ORDER BY SAL, EMPNO), 4 ) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;

--RATIO_TO_REPORT(EXPR) : EXPR의 합계에 대한 현재 EXPR의 비율을 반환한다.
SELECT 
    SAL
    , RATIO_TO_REPORT(SAL) OVER () AS C1
    , SAL / SUM(SAL) OVER() AS C2
FROM 
    EMP
WHERE
    DEPTNO = 30;
    
-- 순차함수 
-- FIRST_VALUE : 윈도우 첫 행의 EXPR을 반환한다. IGNORE NULLS 키워드를 기술하면 NULL이 무시된다.
SELECT 
    JOB
    , HIREDATE
    , SAL
    , FIRST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE) AS C1
FROM 
    EMP A
WHERE DEPTNO = 30;

-- C2 의 경우 SAL = 950 일때 NULL 값이 반환되는데 이는, 자신의 이전 행에 널이 아닌 값이 존재하지 않기 떄문이다.
-- 따라서 모든 값을 채우려면 C3처럼 윈도우를 전체 파티션으로 지정해줘야 한다.
SELECT 
    SAL
    , COMM
    , FIRST_VALUE(COMM) OVER (ORDER BY SAL) AS C1
    , FIRST_VALUE(COMM) IGNORE NULLS OVER (ORDER BY SAL) AS C2
    , FIRST_VALUE(COMM) IGNORE NULLS OVER ( ORDER BY SAL RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS C3
FROM 
    EMP
WHERE DEPTNO = 30
ORDER BY 1,2;


--LAST_VALUE(EXPR) : 윈도우 끝 행의 EXPR를 반환한다.
-- C1은 SAL과 동일한 결과가 반환되는데 이는 윈도우가 RANGE UNBOUNDED PRECEDING으로 작동하기 떄문에
-- 원하는결과를 얻으려면 C2처럼 RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING  으로 윈도우 절을 기술해야 의도한 결과를 얻을 수 있다.
SELECT 
    JOB
    , HIREDATE
    , SAL
    , LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE) AS C1
    , LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) AS C2
FROM
    EMP A
WHERE DEPTNO = 30
ORDER BY 1,2,3;

-- FIRST_VALUE() 정렬기준을 반대로 할경우 LAST_VALUE() 와 동일한 결과를 얻을 수 있다.
SELECT 
    JOB
    , HIREDATE
    , SAL
    , FIRST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE DESC) AS C1
    , LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS C2
FROM 
    EMP A
WHERE DEPTNO = 30
ORDER BY JOB, HIREDATE DESC;

-- NTH_VALUE(MEASURE_EXPR, N) [FROM {FIRST | LAST } ] [IGNORE NULLS] OVER () : 윈도우 N번째 행의 MEASURE_EXPR을 반환한다.
-- FIRST 는 윈도우의 첫행, LAST는 윈도우의 끝행부터 검색을 시작한다.
SELECT
    HIREDATE
    , SAL
    , NTH_VALUE(SAL, 1) OVER (ORDER BY HIREDATE) AS C1
    , NTH_VALUE(SAL, 3) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE ;

SELECT
    HIREDATE
    , SAL
    , NTH_VALUE(SAL, 3) FROM LAST OVER (ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS C1
    , NTH_VALUE(SAL, 3) OVER (ORDER BY HIREDATE DESC) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY 1;

--기타함수  LAG(), LEAD(), LISTAGG()
-- LAG( VALUE_EXPR , [,OFFSET [, DEFAULT]]) [IGNORE NULLS] OVER (~) : 현재 행에서 OFFSET 이전 행의 VALUE_EXPR을 반환한다. 
-- OFFSET은 행 기준이며 기본값은 1이다. DEFAULT 에 이전행이 없을 경우 반환값을 지정할 수 있다, DEFAULT 의 기본값은 NULL이다.
SELECT
    HIREDATE
    , SAL
    , LAG(SAL) OVER (ORDER BY HIREDATE) AS C1
    , LAG(SAL, 3) OVER (ORDER BY HIREDATE ) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY 1;

SELECT
    ENAME
    ,HIREDATE
    , COMM
    , LAG(COMM, 2, 999) IGNORE NULLS OVER (ORDER BY HIREDATE) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY 2;

SELECT
    SAL
    , COMM
    , NVL(COMM, LAG(COMM) IGNORE NULLS OVER (ORDER BY SAL, EMPNO)) AS COMM_N
FROM
    EMP
WHERE DEPTNO = 30
ORDER BY SAL, EMPNO;

SELECT 
    HIREDATE
    , SAL
    , SAL / LAG(SAL, 1, SAL) OVER (ORDER BY HIREDATE) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;

--LEAD(VALUE_EXPR [, OFFSET [ , DEFAULT ]])  [IGNORE NULLS] OVER (~) : 현재행에서 OFFSET이후 행의 VALUE_EXPR 값을 반환한다.
SELECT 
    HIREDATE
    , SAL
    , LEAD(SAL) OVER (ORDER BY HIREDATE) AS C1
    , LEAD(SAL, 3) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;

--LEAD 함수는 LAG와 반대로 동작하므로 내림차순으로 정렬하면 LAG함수와 동일한 결과를 얻을수 있다
--분석함수의 정렬값이 고유하지 않으면 값이 바뀔수 있으므로 정렬값에 고유한 값을 넣도록 한다.
SELECT
    HIREDATE
    , SAL
    , LAG(SAL) OVER (ORDER BY HIREDATE DESC) AS C1
    , LEAD(SAL) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;

--LISTAGG( MEASURE_EXPR [, 'DELIMITER'] [ LISTAGG_OVERFLOW_CLAUSE] ) WITHIN GROUP (ORDER _BY_CLAUSE) [OVER QUERY_PARTITION_CLAUSE]
-- MEASURE_EXPR을 ORDER_BY_CLAUSE로 정렬한 후 DELIMITER로 구분하여 연결한 값을 반환한다. DELIMITER 의 기본값은 NULL이다.
SELECT
    JOB
    , ENAME
    , LISTAGG(ENAME, ',') WITHIN GROUP (ORDER BY ENAME) OVER ( PARTITION BY JOB ) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY JOB, ENAME;


--선분이력 전환
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR(1), DT DATE, VL NUMBER);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-01', 1);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-06', 2);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-16', 1);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-31', 3);
COMMIT;

SELECT 
    CD
    , DT AS BG_DT
    , LEAD( DT - 1, 1, DATE '9999-12-31') OVER (ORDER BY DT) AS ED_DT
    , VL
FROM T1
ORDER BY 1,2;

-- 전체 월말 데이터를 월별 이력으로 관리하는 경우가 있다. 월별 이력을 선분 이력으로 전환해보자
DROP TABLE T1 PURGE;
CREATE TABLE T1 ( CD VARCHAR(1), YM VARCHAR(6), VL NUMBER);

INSERT INTO T1 VALUES('A', '205001', 1);
INSERT INTO T1 VALUES('A', '205002', 1);
INSERT INTO T1 VALUES('A', '205003', 2);
INSERT INTO T1 VALUES('A', '205004', 2);
INSERT INTO T1 VALUES('A', '205005', 2);
INSERT INTO T1 VALUES('A', '205006', 1);
INSERT INTO T1 VALUES('A', '205007', 1);
INSERT INTO T1 VALUES('A', '205008', 1);
INSERT INTO T1 VALUES('A', '205009', 1);
INSERT INTO T1 VALUES('A', '205010', 3);
INSERT INTO T1 VALUES('A', '205011', 3);
INSERT INTO T1 VALUES('A', '205012', 3);
COMMIT;


SELECT 
    CD
    , MIN(YM) AS  BG_YM
    , CASE WHEN
        MAX(R1) = MAX(CN) THEN '999912'
        ELSE MAX(YM)
        END AS ED_YM
    , VL
FROM
    (SELECT
        A.*
        , COUNT(*) OVER (PARTITION BY A.CD) AS CN
        , ROW_NUMBER() OVER (PARTITION BY A.CD ORDER BY A.YM) AS R1
        , ROW_NUMBER() OVER (PARTITION BY A.CD, A.VL ORDER BY A.YM) AS R2
    FROM T1 A)
GROUP BY CD, VL, R1 - R2 -- R1 - R2  하므로써 (0, 2, 3, 9) 그룹이 생성됨
ORDER BY 1,2;

-- 선분이력 병합
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR2(1), BG NUMBER, ED NUMBER, YN VARCHAR2(1));

INSERT INTO T1 VALUES('A', 1, 2, 'Y');
INSERT INTO T1 VALUES('A', 2, 3, 'N');
INSERT INTO T1 VALUES('A', 3, 4, 'N');
INSERT INTO T1 VALUES('A', 4, 5, 'Y');
INSERT INTO T1 VALUES('A', 5, 6, 'Y');
INSERT INTO T1 VALUES('A', 6, 7, 'N');
INSERT INTO T1 VALUES('A', 7, 8, 'N');
INSERT INTO T1 VALUES('A', 8, 9, 'Y');
COMMIT;    

--YN이 'N'인 선분 이력을 병합하는 예제

SELECT
    A.* 
    , ROW_NUMBER() OVER (PARTITION BY CD ORDER BY BG) AS R1
    , ROW_NUMBER() OVER (PARTITION BY CD, YN ORDER BY BG) AS R2
FROM T1 A;

SELECT 
    CD
    ,MIN(BG) AS BG
    ,MAX(ED) AS ED
    , YN
FROM 
    (
        SELECT
            A.* 
            , ROW_NUMBER() OVER (PARTITION BY CD ORDER BY BG) AS R1
            , ROW_NUMBER() OVER (PARTITION BY CD, YN ORDER BY BG) AS R2
        FROM T1 A
    )
GROUP BY CD, YN,  CASE WHEN YN= 'N' THEN R1-R2 ELSE R1 END
ORDER BY 1,2;

-- 행패턴 검색 : 순번의 차를 이용하면 행의 패턴을 검색할 수 있다.
DROP TABLE T1 PURGE;
CREATE TABLE T1 ( CD VARCHAR(1), DT DATE, VL NUMBER);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-01', 100);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-02', 200);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-03', 300);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-04', 400);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-05', 500);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-06', 400);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-07', 500);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-08', 600);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-09', 700);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-10', 500);
COMMIT;

WITH W1 AS (
    SELECT 
        A.*
        , NVL( SIGN(VL - LAG(VL) OVER (PARTITION BY CD ORDER BY DT)), 0) AS DF
    FROM T1 A
)
SELECT
    A.*
    , ROW_NUMBER() OVER (PARTITION BY CD ORDER BY DT) AS R1
    , ROW_NUMBER() OVER (PARTITION BY CD, DF ORDER BY DT) AS R2
FROM W1 A
ORDER BY 1,2;


-- 3일 이상 상승한 구간을 조회

WITH W1 AS (
    SELECT 
        A.*
        , NVL( SIGN(VL - LAG(VL) OVER (PARTITION BY CD ORDER BY DT)), 0) AS DF
    FROM T1 A
)
SELECT 
    A.*
FROM
    (
        SELECT 
            A.* 
            , COUNT(*) OVER (PARTITION BY CD, RN) AS CN
        FROM
            (
                SELECT 
                    A.*
                    , ROW_NUMBER() OVER (PARTITION BY CD ORDER BY DT) - ROW_NUMBER() OVER (PARTITION BY CD, DF ORDER BY DT) AS RN 
                FROM
                    W1 A
            ) A
        WHERE DF = 1 
    ) A
WHERE
    CN >= 3;
    
-- MATCH_RECOGNIZE 절을 사용하면 간단하게 표현할 수 있다. 27장에서 다시 나올 내용
SELECT 
    CD
    , DT
    , VL
    , CN
FROM T1
MATCH_RECOGNIZE(
    PARTITION BY CD ORDER BY DT
    MEASURES FINAL COUNT(*) AS CN
    ALL ROWS PER MATCH
    PATTERN ( UP{3,})
    DEFINE UP AS UP.VL > PREV(UP.VL)
)
ORDER BY CD, DT;

--선형 보간 : 두 지점 사이의 값을 두지점의 직선 거리에 따라 선형적으로 결정하는 방법
-- 2050-01-02 ~ 2050-01-03 구간과 2050-01-05 ~ 2050-01-07 구간의 데이터가 존재하지 않는다.
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR(1), DT DATE, VL NUMBER);

INSERT INTO T1 VALUES('A', DATE '2050-01-01', 100);
INSERT INTO T1 VALUES('A', DATE '2050-01-04', 400);
INSERT INTO T1 VALUES('A', DATE '2050-01-08', 800);
COMMIT;

-- 행복제 기법을 사용하여 데이터가 존재하지 않는 구간의 행을 생성하고 LEAD함수를 사용하여 보간값을 계산한다
SELECT 
    A.DT + B.LV -1 AS DT
    , ROUND( A.VL + (A.VL_DF / A.DN) * (B.LV -1), 2) AS VL
FROM
(
    SELECT 
        A.*
        , NVL(LEAD (A.VL) OVER (ORDER BY A.DT) - A.VL, 0) AS VL_DF
        , NVL(LEAD (A.DT) OVER (ORDER BY A.DT) - A.DT, 1) AS DN
    FROM T1 A
) A,
(
    SELECT LEVEL AS LV FROM DUAL CONNECT BY LEVEL <= 10
) B
WHERE B.LV <= A.DN
ORDER BY 1;

'dev > DB' 카테고리의 다른 글

[DB/ORACLE] 계층쿼리  (0) 2022.11.30
[DB/ORACLE] TOP-N QUERY  (0) 2022.11.29
[DB/ORACLE] 분석함수  (0) 2022.11.25
[DB/Mysql] HAVING, GROUP BY  (0) 2022.07.05
[DB/MySQL] 기타함수(Group_concat)  (0) 2022.07.05