🪟 Window functions essentielles — SQL

Format : SQL · 25 patterns · PostgreSQL + MySQL 8 + BigQuery + Snowflake Auteur : Équipe pédagogique ITAG · Data Pack Mise à jour : 2026

🎯 Description

Pack de 25 window functions prêtes à coller, avec données de test, résultat attendu et explication. Couvre LAG, LEAD, ROW_NUMBER, RANK, NTILE, FIRST_VALUE, LAST_VALUE, et patterns analytics avancés.

📋 Patterns inclus

Ranking & numbering (5)

ROW_NUMBER() — déduplication
RANK() vs DENSE_RANK() — gestion des ties
NTILE(n) — quantiles (quartiles, déciles)
PERCENT_RANK() — percentiles
Top N par groupe (PARTITION BY)

Lag / lead (5)

LAG() — comparaison row précédente (growth rate)
LEAD() — comparaison row suivante (next value)
Différence consécutive (sales day-over-day)
Sessions calcul (gap > 30 min)
Détection de séquences

Aggregations (8)

SUM() OVER — running total
Moving average 7 / 30 jours
Cumulative percentage
Rolling max / min
COUNT() OVER — count distinct par fenêtre
STDDEV() OVER — volatility
FIRST_VALUE / LAST_VALUE
NTH_VALUE

Analytics avancés (7)

Cohort retention analysis
Funnel conversion par étape
RFM segmentation (Recency, Frequency, Monetary)
Customer lifetime value (CLV)
Churn detection
ABC analysis (Pareto)
Time-weighted average

💼 Cas d'usage

Préparation interview Data Analyst / Scientist
Examen Snowflake SnowPro / BigQuery
Migrations Excel → SQL (analystes business)
Tutoring SQL avancé

💾 25 patterns Window Functions

Données de test

-- Table unique pour tous les 25 patterns
CREATE TABLE sales (
    id          SERIAL PRIMARY KEY,
    sale_date   DATE         NOT NULL,
    product_id  VARCHAR(20)  NOT NULL,
    category    VARCHAR(50)  NOT NULL,
    amount      NUMERIC(10,2) NOT NULL,
    customer_id VARCHAR(20)  NOT NULL
);

INSERT INTO sales (sale_date, product_id, category, amount, customer_id) VALUES
('2024-01-01', 'P01', 'Formation',    299.00, 'C01'),
('2024-01-01', 'P02', 'Formation',    249.00, 'C02'),
('2024-01-02', 'P03', 'Certification',499.00, 'C03'),
('2024-01-02', 'P01', 'Formation',    299.00, 'C04'),
('2024-01-03', 'P04', 'Abonnement',   199.00, 'C01'),
('2024-01-03', 'P02', 'Formation',    249.00, 'C05'),
('2024-01-04', 'P03', 'Certification',499.00, 'C06'),
('2024-01-04', 'P05', 'Formation',    149.00, 'C02'),
('2024-01-05', 'P01', 'Formation',    299.00, 'C07'),
('2024-01-05', 'P04', 'Abonnement',   199.00, 'C08'),
('2024-01-06', 'P06', 'Certification',399.00, 'C03'),
('2024-01-07', 'P02', 'Formation',    249.00, 'C09'),
('2024-01-08', 'P03', 'Certification',499.00, 'C10'),
('2024-01-09', 'P01', 'Formation',    299.00, 'C01'),
('2024-01-10', 'P04', 'Abonnement',   199.00, 'C05'),
('2024-01-11', 'P07', 'Pack',         799.00, 'C04'),
('2024-01-12', 'P02', 'Formation',    249.00, 'C06'),
('2024-01-13', 'P06', 'Certification',399.00, 'C07'),
('2024-01-14', 'P01', 'Formation',    299.00, 'C08'),
('2024-01-15', 'P03', 'Certification',499.00, 'C02');

Bloc 1 — Ranking (patterns 1-5)

-- Pattern 1 : ROW_NUMBER — déduplication (garde la vente la plus récente par client)
SELECT *
FROM (
    SELECT
        *,
        ROW_NUMBER() OVER (
            PARTITION BY customer_id
            ORDER BY sale_date DESC
        ) AS rn
    FROM sales
) t
WHERE rn = 1;
-- Résultat : 1 ligne par customer_id, la plus récente

-- Pattern 2 : RANK vs DENSE_RANK — gestion des ex-aequo
SELECT
    product_id,
    category,
    SUM(amount)                                       AS total_revenue,
    RANK()       OVER (ORDER BY SUM(amount) DESC)     AS rank_with_gaps,
    DENSE_RANK() OVER (ORDER BY SUM(amount) DESC)     AS rank_no_gaps
FROM sales
GROUP BY product_id, category
ORDER BY total_revenue DESC;
-- RANK : 1,2,2,4 (saute le 3 après un ex-aequo)
-- DENSE_RANK : 1,2,2,3 (pas de saut)

-- Pattern 3 : NTILE — découpage en quartiles
SELECT
    customer_id,
    SUM(amount)                         AS customer_total,
    NTILE(4) OVER (
        ORDER BY SUM(amount) DESC
    )                                   AS quartile,   -- Q1=top 25%
    NTILE(10) OVER (
        ORDER BY SUM(amount) DESC
    )                                   AS decile
FROM sales
GROUP BY customer_id
ORDER BY customer_total DESC;

-- Pattern 4 : PERCENT_RANK — position en percentile (0.0 à 1.0)
SELECT
    sale_date,
    amount,
    ROUND(
        PERCENT_RANK() OVER (ORDER BY amount) * 100, 1
    )                                    AS percentile_rank
FROM sales
ORDER BY amount DESC;
-- Interprétation : 95.0 = cette vente est dans le top 5%

-- Pattern 5 : Top N par groupe (PARTITION BY)
SELECT *
FROM (
    SELECT
        *,
        ROW_NUMBER() OVER (
            PARTITION BY category
            ORDER BY amount DESC
        ) AS rank_in_category
    FROM sales
) t
WHERE rank_in_category <= 3;
-- Résultat : Top 3 des ventes par catégorie

Bloc 2 — Running totals (patterns 6-10)

-- Pattern 6 : Cumulative SUM — running total par date
SELECT
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )                     AS running_total,
    SUM(amount) OVER ()   AS grand_total
FROM sales
ORDER BY sale_date;

-- Pattern 7 : Running average — moyenne cumulée
SELECT
    sale_date,
    amount,
    ROUND(
        AVG(amount) OVER (
            ORDER BY sale_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
        ), 2
    )                       AS running_avg
FROM sales
ORDER BY sale_date;

-- Pattern 8 : Rolling MAX et MIN (sur les 7 dernières lignes)
SELECT
    sale_date,
    amount,
    MAX(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    )           AS rolling_max_7,
    MIN(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    )           AS rolling_min_7
FROM sales
ORDER BY sale_date;

-- Pattern 9 : YTD (Year-to-Date) par catégorie
SELECT
    sale_date,
    category,
    amount,
    SUM(amount) OVER (
        PARTITION BY category, EXTRACT(YEAR FROM sale_date)
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )           AS ytd_by_category
FROM sales
ORDER BY category, sale_date;

-- Pattern 10 : Cumulative percentage (part du total cumulée)
SELECT
    sale_date,
    amount,
    SUM(amount) OVER (ORDER BY sale_date)   AS running_total,
    SUM(amount) OVER ()                     AS grand_total,
    ROUND(
        SUM(amount) OVER (ORDER BY sale_date) /
        SUM(amount) OVER () * 100, 1
    )                                       AS cumulative_pct
FROM sales
ORDER BY sale_date;

Bloc 3 — Lead / Lag (patterns 11-15)

-- Pattern 11 : LAG — variation J-1 (day-over-day)
WITH daily AS (
    SELECT sale_date, SUM(amount) AS daily_revenue
    FROM sales
    GROUP BY sale_date
)
SELECT
    sale_date,
    daily_revenue,
    LAG(daily_revenue, 1) OVER (ORDER BY sale_date)  AS prev_day_revenue,
    ROUND(
        (daily_revenue - LAG(daily_revenue, 1) OVER (ORDER BY sale_date))
        / NULLIF(LAG(daily_revenue, 1) OVER (ORDER BY sale_date), 0) * 100, 1
    )                                                  AS dod_pct_change
FROM daily
ORDER BY sale_date;

-- Pattern 12 : LEAD — voir la prochaine valeur (next day preview)
WITH daily AS (
    SELECT sale_date, SUM(amount) AS daily_revenue
    FROM sales
    GROUP BY sale_date
)
SELECT
    sale_date,
    daily_revenue,
    LEAD(daily_revenue, 1) OVER (ORDER BY sale_date)  AS next_day_revenue,
    LEAD(sale_date,     1) OVER (ORDER BY sale_date)  AS next_day
FROM daily
ORDER BY sale_date;

-- Pattern 13 : MoM% (Month-over-Month) avec LAG décalage de N jours
WITH monthly AS (
    SELECT
        DATE_TRUNC('month', sale_date)  AS month,
        SUM(amount)                     AS monthly_revenue
    FROM sales
    GROUP BY DATE_TRUNC('month', sale_date)
)
SELECT
    month,
    monthly_revenue,
    LAG(monthly_revenue) OVER (ORDER BY month)   AS prev_month_revenue,
    ROUND(
        (monthly_revenue - LAG(monthly_revenue) OVER (ORDER BY month))
        / NULLIF(LAG(monthly_revenue) OVER (ORDER BY month), 0) * 100, 1
    )                                             AS mom_pct
FROM monthly
ORDER BY month;

-- Pattern 14 : Moving average 7 jours (sur les 6 précédents + courant)
WITH daily AS (
    SELECT sale_date, SUM(amount) AS daily_revenue
    FROM sales
    GROUP BY sale_date
)
SELECT
    sale_date,
    daily_revenue,
    ROUND(
        AVG(daily_revenue) OVER (
            ORDER BY sale_date
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ), 2
    )                                   AS ma_7d,
    ROUND(
        AVG(daily_revenue) OVER (
            ORDER BY sale_date
            ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
        ), 2
    )                                   AS ma_30d
FROM daily
ORDER BY sale_date;

-- Pattern 15 : Comparaison avec la ligne précédente (même produit)
SELECT
    sale_date,
    product_id,
    amount,
    LAG(amount) OVER (
        PARTITION BY product_id
        ORDER BY sale_date
    )                           AS prev_sale_same_product,
    amount - LAG(amount) OVER (
        PARTITION BY product_id
        ORDER BY sale_date
    )                           AS diff_vs_prev
FROM sales
ORDER BY product_id, sale_date;

Bloc 4 — Frames (patterns 16-20)

-- Pattern 16 : Moving average pondérée (weighted avg sur 7j — poids décroissants)
WITH daily AS (
    SELECT sale_date, SUM(amount) AS rev
    FROM sales GROUP BY sale_date
),
weights AS (
    SELECT
        sale_date, rev,
        ROW_NUMBER() OVER (ORDER BY sale_date) AS rn
    FROM daily
)
SELECT
    sale_date,
    rev,
    -- Approximation : on utilise un CASE pour des poids explicites
    ROUND(
        AVG(rev) OVER (
            ORDER BY sale_date
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ), 2
    ) AS simple_ma_7d
    -- Pour un vrai WMA, voir Pattern avancé 25
FROM daily
ORDER BY sale_date;

-- Pattern 17 : FIRST_VALUE et LAST_VALUE dans une fenêtre
SELECT
    sale_date,
    category,
    amount,
    FIRST_VALUE(amount) OVER (
        PARTITION BY category
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    )                       AS first_sale_in_category,
    LAST_VALUE(amount)  OVER (
        PARTITION BY category
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    )                       AS last_sale_in_category
FROM sales
ORDER BY category, sale_date;

-- Pattern 18 : NTH_VALUE — la 2e valeur dans chaque groupe
SELECT
    sale_date,
    category,
    amount,
    NTH_VALUE(amount, 2) OVER (
        PARTITION BY category
        ORDER BY amount DESC
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    )                       AS second_highest_in_category
FROM sales
ORDER BY category, amount DESC;

-- Pattern 19 : Médiane glissante (approximation avec PERCENTILE_CONT)
-- Note : PERCENTILE_CONT est une ordered-set aggregate, pas une window function pure.
-- On l'utilise en sous-requête agrégée ici.
SELECT
    category,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount) AS median_amount,
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY amount) AS p25,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY amount) AS p75
FROM sales
GROUP BY category;

-- Pattern 20 : Écart-type glissant (volatilité des ventes)
WITH daily AS (
    SELECT sale_date, SUM(amount) AS rev
    FROM sales GROUP BY sale_date
)
SELECT
    sale_date,
    rev,
    ROUND(
        STDDEV(rev) OVER (
            ORDER BY sale_date
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ), 2
    )                       AS stddev_7d,
    ROUND(
        rev / NULLIF(
            STDDEV(rev) OVER (
                ORDER BY sale_date
                ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
            ), 0
        ), 2
    )                       AS zscore_30d   -- Détection d'anomalies
FROM daily
ORDER BY sale_date;

Bloc 5 — Advanced (patterns 21-25)

-- Pattern 21 : Gap detection (périodes sans ventes > N jours)
WITH daily AS (
    SELECT DISTINCT sale_date FROM sales ORDER BY sale_date
),
with_gaps AS (
    SELECT
        sale_date,
        LAG(sale_date) OVER (ORDER BY sale_date) AS prev_date,
        sale_date - LAG(sale_date) OVER (ORDER BY sale_date) AS gap_days
    FROM daily
)
SELECT
    prev_date   AS gap_start,
    sale_date   AS gap_end,
    gap_days    AS days_without_sales
FROM with_gaps
WHERE gap_days > 1
ORDER BY gap_days DESC;

-- Pattern 22 : Sessionization (regroupe les événements proches en sessions)
-- On simule des timestamps d'accès pour l'exemple
WITH events AS (
    SELECT
        customer_id,
        sale_date + (ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY sale_date) * INTERVAL '2 hours') AS event_time
    FROM sales
),
with_prev AS (
    SELECT
        customer_id,
        event_time,
        LAG(event_time) OVER (PARTITION BY customer_id ORDER BY event_time) AS prev_event_time,
        EXTRACT(EPOCH FROM (
            event_time - LAG(event_time) OVER (PARTITION BY customer_id ORDER BY event_time)
        )) / 60                          AS minutes_since_prev
    FROM events
),
session_flags AS (
    SELECT
        *,
        CASE WHEN minutes_since_prev > 30 OR minutes_since_prev IS NULL
             THEN 1 ELSE 0 END          AS is_new_session
    FROM with_prev
)
SELECT
    customer_id,
    event_time,
    SUM(is_new_session) OVER (
        PARTITION BY customer_id
        ORDER BY event_time
    )                                   AS session_number,
    minutes_since_prev
FROM session_flags
ORDER BY customer_id, event_time;

-- Pattern 23 : Percentile par catégorie (PERCENT_RANK + seuils)
SELECT
    product_id,
    category,
    amount,
    ROUND(
        PERCENT_RANK() OVER (
            PARTITION BY category
            ORDER BY amount
        ) * 100, 1
    )                               AS pct_rank_in_category,
    CASE
        WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.9
             THEN 'Top 10%'
        WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.75
             THEN 'Top 25%'
        WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.5
             THEN 'Top 50%'
        ELSE 'Bottom 50%'
    END                             AS performance_tier
FROM sales
ORDER BY category, amount DESC;

-- Pattern 24 : ABC Analysis (Pareto — 80/20)
WITH product_revenue AS (
    SELECT
        product_id,
        category,
        SUM(amount)                     AS total_rev
    FROM sales
    GROUP BY product_id, category
),
with_cumulative AS (
    SELECT
        *,
        SUM(total_rev) OVER (ORDER BY total_rev DESC)  AS cumulative_rev,
        SUM(total_rev) OVER ()                          AS grand_total
    FROM product_revenue
)
SELECT
    product_id,
    category,
    ROUND(total_rev, 2)            AS revenue,
    ROUND(cumulative_rev / grand_total * 100, 1) AS cumulative_pct,
    CASE
        WHEN cumulative_rev / grand_total <= 0.80 THEN 'A — Prioritaire (80%)'
        WHEN cumulative_rev / grand_total <= 0.95 THEN 'B — Important (15%)'
        ELSE                                           'C — Marginal (5%)'
    END                            AS abc_class
FROM with_cumulative
ORDER BY total_rev DESC;

-- Pattern 25 : Time-weighted average (moyenne pondérée par durée)
-- Utile pour le prix moyen pondéré d'un actif sur une période
WITH priced_intervals AS (
    SELECT
        sale_date,
        amount                                                  AS price,
        LEAD(sale_date) OVER (ORDER BY sale_date)              AS next_date,
        LEAD(sale_date) OVER (ORDER BY sale_date) - sale_date  AS holding_days
    FROM sales
    WHERE product_id = 'P01'   -- Focus sur un produit
)
SELECT
    SUM(price * holding_days)::NUMERIC /
    NULLIF(SUM(holding_days)::NUMERIC, 0)     AS time_weighted_avg_price,
    MIN(price)                                 AS min_price,
    MAX(price)                                 AS max_price,
    AVG(price)                                 AS simple_avg_price,
    SUM(holding_days)                          AS total_days_observed
FROM priced_intervals
WHERE holding_days IS NOT NULL;

📥 Télécharger ce template

Télécharger le fichier .md · ← Retour catalogue · Parcours Data →

ITAG · Non-affiliés Snowflake / Google / Oracle / Microsoft.