--NTILE() 으로 나눈 그룹
SELECT C1, COUNT(*) AS C2, SUM(SAL) AS C3
FROM (SELECT SAL, NTILE(4) OVER (ORDER BY SAL) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;
--CEIL() 으로 나눈 그룹
SELECT C1, COUNT(*) AS C2, SUM(SAL) AS C3
FROM (SELECT SAL, CEIL( ROW_NUMBER() OVER (ORDER BY SAL, EMPNO) / (COUNT(*) OVER() / 4 )) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;
--ㅡMOD() 으로 나눈 그룹
SELECT C1, COUNT(*) AS C2, SUM(SAL) AS C3
FROM (SELECT SAL, MOD( ROW_NUMBER() OVER (ORDER BY SAL, EMPNO), 4 ) AS C1 FROM EMP WHERE DEPTNO = 30)
GROUP BY C1;
--RATIO_TO_REPORT(EXPR) : EXPR의 합계에 대한 현재 EXPR의 비율을 반환한다.
SELECT
SAL
, RATIO_TO_REPORT(SAL) OVER () AS C1
, SAL / SUM(SAL) OVER() AS C2
FROM
EMP
WHERE
DEPTNO = 30;
-- 순차함수
-- FIRST_VALUE : 윈도우 첫 행의 EXPR을 반환한다. IGNORE NULLS 키워드를 기술하면 NULL이 무시된다.
SELECT
JOB
, HIREDATE
, SAL
, FIRST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE) AS C1
FROM
EMP A
WHERE DEPTNO = 30;
-- C2 의 경우 SAL = 950 일때 NULL 값이 반환되는데 이는, 자신의 이전 행에 널이 아닌 값이 존재하지 않기 떄문이다.
-- 따라서 모든 값을 채우려면 C3처럼 윈도우를 전체 파티션으로 지정해줘야 한다.
SELECT
SAL
, COMM
, FIRST_VALUE(COMM) OVER (ORDER BY SAL) AS C1
, FIRST_VALUE(COMM) IGNORE NULLS OVER (ORDER BY SAL) AS C2
, FIRST_VALUE(COMM) IGNORE NULLS OVER ( ORDER BY SAL RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS C3
FROM
EMP
WHERE DEPTNO = 30
ORDER BY 1,2;
--LAST_VALUE(EXPR) : 윈도우 끝 행의 EXPR를 반환한다.
-- C1은 SAL과 동일한 결과가 반환되는데 이는 윈도우가 RANGE UNBOUNDED PRECEDING으로 작동하기 떄문에
-- 원하는결과를 얻으려면 C2처럼 RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING 으로 윈도우 절을 기술해야 의도한 결과를 얻을 수 있다.
SELECT
JOB
, HIREDATE
, SAL
, LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE) AS C1
, LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) AS C2
FROM
EMP A
WHERE DEPTNO = 30
ORDER BY 1,2,3;
-- FIRST_VALUE() 정렬기준을 반대로 할경우 LAST_VALUE() 와 동일한 결과를 얻을 수 있다.
SELECT
JOB
, HIREDATE
, SAL
, FIRST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE DESC) AS C1
, LAST_VALUE(SAL) OVER (PARTITION BY JOB ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS C2
FROM
EMP A
WHERE DEPTNO = 30
ORDER BY JOB, HIREDATE DESC;
-- NTH_VALUE(MEASURE_EXPR, N) [FROM {FIRST | LAST } ] [IGNORE NULLS] OVER () : 윈도우 N번째 행의 MEASURE_EXPR을 반환한다.
-- FIRST 는 윈도우의 첫행, LAST는 윈도우의 끝행부터 검색을 시작한다.
SELECT
HIREDATE
, SAL
, NTH_VALUE(SAL, 1) OVER (ORDER BY HIREDATE) AS C1
, NTH_VALUE(SAL, 3) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE ;
SELECT
HIREDATE
, SAL
, NTH_VALUE(SAL, 3) FROM LAST OVER (ORDER BY HIREDATE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS C1
, NTH_VALUE(SAL, 3) OVER (ORDER BY HIREDATE DESC) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY 1;
--기타함수 LAG(), LEAD(), LISTAGG()
-- LAG( VALUE_EXPR , [,OFFSET [, DEFAULT]]) [IGNORE NULLS] OVER (~) : 현재 행에서 OFFSET 이전 행의 VALUE_EXPR을 반환한다.
-- OFFSET은 행 기준이며 기본값은 1이다. DEFAULT 에 이전행이 없을 경우 반환값을 지정할 수 있다, DEFAULT 의 기본값은 NULL이다.
SELECT
HIREDATE
, SAL
, LAG(SAL) OVER (ORDER BY HIREDATE) AS C1
, LAG(SAL, 3) OVER (ORDER BY HIREDATE ) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY 1;
SELECT
ENAME
,HIREDATE
, COMM
, LAG(COMM, 2, 999) IGNORE NULLS OVER (ORDER BY HIREDATE) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY 2;
SELECT
SAL
, COMM
, NVL(COMM, LAG(COMM) IGNORE NULLS OVER (ORDER BY SAL, EMPNO)) AS COMM_N
FROM
EMP
WHERE DEPTNO = 30
ORDER BY SAL, EMPNO;
SELECT
HIREDATE
, SAL
, SAL / LAG(SAL, 1, SAL) OVER (ORDER BY HIREDATE) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;
--LEAD(VALUE_EXPR [, OFFSET [ , DEFAULT ]]) [IGNORE NULLS] OVER (~) : 현재행에서 OFFSET이후 행의 VALUE_EXPR 값을 반환한다.
SELECT
HIREDATE
, SAL
, LEAD(SAL) OVER (ORDER BY HIREDATE) AS C1
, LEAD(SAL, 3) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;
--LEAD 함수는 LAG와 반대로 동작하므로 내림차순으로 정렬하면 LAG함수와 동일한 결과를 얻을수 있다
--분석함수의 정렬값이 고유하지 않으면 값이 바뀔수 있으므로 정렬값에 고유한 값을 넣도록 한다.
SELECT
HIREDATE
, SAL
, LAG(SAL) OVER (ORDER BY HIREDATE DESC) AS C1
, LEAD(SAL) OVER (ORDER BY HIREDATE) AS C2
FROM EMP
WHERE DEPTNO = 30
ORDER BY HIREDATE;
--LISTAGG( MEASURE_EXPR [, 'DELIMITER'] [ LISTAGG_OVERFLOW_CLAUSE] ) WITHIN GROUP (ORDER _BY_CLAUSE) [OVER QUERY_PARTITION_CLAUSE]
-- MEASURE_EXPR을 ORDER_BY_CLAUSE로 정렬한 후 DELIMITER로 구분하여 연결한 값을 반환한다. DELIMITER 의 기본값은 NULL이다.
SELECT
JOB
, ENAME
, LISTAGG(ENAME, ',') WITHIN GROUP (ORDER BY ENAME) OVER ( PARTITION BY JOB ) AS C1
FROM EMP
WHERE DEPTNO = 30
ORDER BY JOB, ENAME;
--선분이력 전환
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR(1), DT DATE, VL NUMBER);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-01', 1);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-06', 2);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-16', 1);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-31', 3);
COMMIT;
SELECT
CD
, DT AS BG_DT
, LEAD( DT - 1, 1, DATE '9999-12-31') OVER (ORDER BY DT) AS ED_DT
, VL
FROM T1
ORDER BY 1,2;
-- 전체 월말 데이터를 월별 이력으로 관리하는 경우가 있다. 월별 이력을 선분 이력으로 전환해보자
DROP TABLE T1 PURGE;
CREATE TABLE T1 ( CD VARCHAR(1), YM VARCHAR(6), VL NUMBER);
INSERT INTO T1 VALUES('A', '205001', 1);
INSERT INTO T1 VALUES('A', '205002', 1);
INSERT INTO T1 VALUES('A', '205003', 2);
INSERT INTO T1 VALUES('A', '205004', 2);
INSERT INTO T1 VALUES('A', '205005', 2);
INSERT INTO T1 VALUES('A', '205006', 1);
INSERT INTO T1 VALUES('A', '205007', 1);
INSERT INTO T1 VALUES('A', '205008', 1);
INSERT INTO T1 VALUES('A', '205009', 1);
INSERT INTO T1 VALUES('A', '205010', 3);
INSERT INTO T1 VALUES('A', '205011', 3);
INSERT INTO T1 VALUES('A', '205012', 3);
COMMIT;
SELECT
CD
, MIN(YM) AS BG_YM
, CASE WHEN
MAX(R1) = MAX(CN) THEN '999912'
ELSE MAX(YM)
END AS ED_YM
, VL
FROM
(SELECT
A.*
, COUNT(*) OVER (PARTITION BY A.CD) AS CN
, ROW_NUMBER() OVER (PARTITION BY A.CD ORDER BY A.YM) AS R1
, ROW_NUMBER() OVER (PARTITION BY A.CD, A.VL ORDER BY A.YM) AS R2
FROM T1 A)
GROUP BY CD, VL, R1 - R2 -- R1 - R2 하므로써 (0, 2, 3, 9) 그룹이 생성됨
ORDER BY 1,2;
-- 선분이력 병합
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR2(1), BG NUMBER, ED NUMBER, YN VARCHAR2(1));
INSERT INTO T1 VALUES('A', 1, 2, 'Y');
INSERT INTO T1 VALUES('A', 2, 3, 'N');
INSERT INTO T1 VALUES('A', 3, 4, 'N');
INSERT INTO T1 VALUES('A', 4, 5, 'Y');
INSERT INTO T1 VALUES('A', 5, 6, 'Y');
INSERT INTO T1 VALUES('A', 6, 7, 'N');
INSERT INTO T1 VALUES('A', 7, 8, 'N');
INSERT INTO T1 VALUES('A', 8, 9, 'Y');
COMMIT;
--YN이 'N'인 선분 이력을 병합하는 예제
SELECT
A.*
, ROW_NUMBER() OVER (PARTITION BY CD ORDER BY BG) AS R1
, ROW_NUMBER() OVER (PARTITION BY CD, YN ORDER BY BG) AS R2
FROM T1 A;
SELECT
CD
,MIN(BG) AS BG
,MAX(ED) AS ED
, YN
FROM
(
SELECT
A.*
, ROW_NUMBER() OVER (PARTITION BY CD ORDER BY BG) AS R1
, ROW_NUMBER() OVER (PARTITION BY CD, YN ORDER BY BG) AS R2
FROM T1 A
)
GROUP BY CD, YN, CASE WHEN YN= 'N' THEN R1-R2 ELSE R1 END
ORDER BY 1,2;
-- 행패턴 검색 : 순번의 차를 이용하면 행의 패턴을 검색할 수 있다.
DROP TABLE T1 PURGE;
CREATE TABLE T1 ( CD VARCHAR(1), DT DATE, VL NUMBER);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-01', 100);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-02', 200);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-03', 300);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-04', 400);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-05', 500);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-06', 400);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-07', 500);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-08', 600);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-09', 700);
INSERT INTO T1 VALUES( 'A', DATE '2050-01-10', 500);
COMMIT;
WITH W1 AS (
SELECT
A.*
, NVL( SIGN(VL - LAG(VL) OVER (PARTITION BY CD ORDER BY DT)), 0) AS DF
FROM T1 A
)
SELECT
A.*
, ROW_NUMBER() OVER (PARTITION BY CD ORDER BY DT) AS R1
, ROW_NUMBER() OVER (PARTITION BY CD, DF ORDER BY DT) AS R2
FROM W1 A
ORDER BY 1,2;
-- 3일 이상 상승한 구간을 조회
WITH W1 AS (
SELECT
A.*
, NVL( SIGN(VL - LAG(VL) OVER (PARTITION BY CD ORDER BY DT)), 0) AS DF
FROM T1 A
)
SELECT
A.*
FROM
(
SELECT
A.*
, COUNT(*) OVER (PARTITION BY CD, RN) AS CN
FROM
(
SELECT
A.*
, ROW_NUMBER() OVER (PARTITION BY CD ORDER BY DT) - ROW_NUMBER() OVER (PARTITION BY CD, DF ORDER BY DT) AS RN
FROM
W1 A
) A
WHERE DF = 1
) A
WHERE
CN >= 3;
-- MATCH_RECOGNIZE 절을 사용하면 간단하게 표현할 수 있다. 27장에서 다시 나올 내용
SELECT
CD
, DT
, VL
, CN
FROM T1
MATCH_RECOGNIZE(
PARTITION BY CD ORDER BY DT
MEASURES FINAL COUNT(*) AS CN
ALL ROWS PER MATCH
PATTERN ( UP{3,})
DEFINE UP AS UP.VL > PREV(UP.VL)
)
ORDER BY CD, DT;
--선형 보간 : 두 지점 사이의 값을 두지점의 직선 거리에 따라 선형적으로 결정하는 방법
-- 2050-01-02 ~ 2050-01-03 구간과 2050-01-05 ~ 2050-01-07 구간의 데이터가 존재하지 않는다.
DROP TABLE T1 PURGE;
CREATE TABLE T1 (CD VARCHAR(1), DT DATE, VL NUMBER);
INSERT INTO T1 VALUES('A', DATE '2050-01-01', 100);
INSERT INTO T1 VALUES('A', DATE '2050-01-04', 400);
INSERT INTO T1 VALUES('A', DATE '2050-01-08', 800);
COMMIT;
-- 행복제 기법을 사용하여 데이터가 존재하지 않는 구간의 행을 생성하고 LEAD함수를 사용하여 보간값을 계산한다
SELECT
A.DT + B.LV -1 AS DT
, ROUND( A.VL + (A.VL_DF / A.DN) * (B.LV -1), 2) AS VL
FROM
(
SELECT
A.*
, NVL(LEAD (A.VL) OVER (ORDER BY A.DT) - A.VL, 0) AS VL_DF
, NVL(LEAD (A.DT) OVER (ORDER BY A.DT) - A.DT, 1) AS DN
FROM T1 A
) A,
(
SELECT LEVEL AS LV FROM DUAL CONNECT BY LEVEL <= 10
) B
WHERE B.LV <= A.DN
ORDER BY 1;