🪟 Window functions essentielles — SQL
Format : SQL · 25 patterns · PostgreSQL + MySQL 8 + BigQuery + Snowflake Auteur : Équipe pédagogique ITAG · Data Pack Mise à jour : 2026
🎯 Description
Pack de 25 window functions prêtes à coller, avec données de test, résultat attendu et explication. Couvre LAG, LEAD, ROW_NUMBER, RANK, NTILE, FIRST_VALUE, LAST_VALUE, et patterns analytics avancés.
📋 Patterns inclus
Ranking & numbering (5)
ROW_NUMBER()— déduplicationRANK()vsDENSE_RANK()— gestion des tiesNTILE(n)— quantiles (quartiles, déciles)PERCENT_RANK()— percentiles- Top N par groupe (PARTITION BY)
Lag / lead (5)
LAG()— comparaison row précédente (growth rate)LEAD()— comparaison row suivante (next value)- Différence consécutive (sales day-over-day)
- Sessions calcul (gap > 30 min)
- Détection de séquences
Aggregations (8)
SUM() OVER— running total- Moving average 7 / 30 jours
- Cumulative percentage
- Rolling max / min
COUNT() OVER— count distinct par fenêtreSTDDEV() OVER— volatilityFIRST_VALUE/LAST_VALUENTH_VALUE
Analytics avancés (7)
- Cohort retention analysis
- Funnel conversion par étape
- RFM segmentation (Recency, Frequency, Monetary)
- Customer lifetime value (CLV)
- Churn detection
- ABC analysis (Pareto)
- Time-weighted average
💼 Cas d'usage
- Préparation interview Data Analyst / Scientist
- Examen Snowflake SnowPro / BigQuery
- Migrations Excel → SQL (analystes business)
- Tutoring SQL avancé
💾 25 patterns Window Functions
Données de test
-- Table unique pour tous les 25 patterns
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
sale_date DATE NOT NULL,
product_id VARCHAR(20) NOT NULL,
category VARCHAR(50) NOT NULL,
amount NUMERIC(10,2) NOT NULL,
customer_id VARCHAR(20) NOT NULL
);
INSERT INTO sales (sale_date, product_id, category, amount, customer_id) VALUES
('2024-01-01', 'P01', 'Formation', 299.00, 'C01'),
('2024-01-01', 'P02', 'Formation', 249.00, 'C02'),
('2024-01-02', 'P03', 'Certification',499.00, 'C03'),
('2024-01-02', 'P01', 'Formation', 299.00, 'C04'),
('2024-01-03', 'P04', 'Abonnement', 199.00, 'C01'),
('2024-01-03', 'P02', 'Formation', 249.00, 'C05'),
('2024-01-04', 'P03', 'Certification',499.00, 'C06'),
('2024-01-04', 'P05', 'Formation', 149.00, 'C02'),
('2024-01-05', 'P01', 'Formation', 299.00, 'C07'),
('2024-01-05', 'P04', 'Abonnement', 199.00, 'C08'),
('2024-01-06', 'P06', 'Certification',399.00, 'C03'),
('2024-01-07', 'P02', 'Formation', 249.00, 'C09'),
('2024-01-08', 'P03', 'Certification',499.00, 'C10'),
('2024-01-09', 'P01', 'Formation', 299.00, 'C01'),
('2024-01-10', 'P04', 'Abonnement', 199.00, 'C05'),
('2024-01-11', 'P07', 'Pack', 799.00, 'C04'),
('2024-01-12', 'P02', 'Formation', 249.00, 'C06'),
('2024-01-13', 'P06', 'Certification',399.00, 'C07'),
('2024-01-14', 'P01', 'Formation', 299.00, 'C08'),
('2024-01-15', 'P03', 'Certification',499.00, 'C02');
Bloc 1 — Ranking (patterns 1-5)
-- Pattern 1 : ROW_NUMBER — déduplication (garde la vente la plus récente par client)
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY sale_date DESC
) AS rn
FROM sales
) t
WHERE rn = 1;
-- Résultat : 1 ligne par customer_id, la plus récente
-- Pattern 2 : RANK vs DENSE_RANK — gestion des ex-aequo
SELECT
product_id,
category,
SUM(amount) AS total_revenue,
RANK() OVER (ORDER BY SUM(amount) DESC) AS rank_with_gaps,
DENSE_RANK() OVER (ORDER BY SUM(amount) DESC) AS rank_no_gaps
FROM sales
GROUP BY product_id, category
ORDER BY total_revenue DESC;
-- RANK : 1,2,2,4 (saute le 3 après un ex-aequo)
-- DENSE_RANK : 1,2,2,3 (pas de saut)
-- Pattern 3 : NTILE — découpage en quartiles
SELECT
customer_id,
SUM(amount) AS customer_total,
NTILE(4) OVER (
ORDER BY SUM(amount) DESC
) AS quartile, -- Q1=top 25%
NTILE(10) OVER (
ORDER BY SUM(amount) DESC
) AS decile
FROM sales
GROUP BY customer_id
ORDER BY customer_total DESC;
-- Pattern 4 : PERCENT_RANK — position en percentile (0.0 à 1.0)
SELECT
sale_date,
amount,
ROUND(
PERCENT_RANK() OVER (ORDER BY amount) * 100, 1
) AS percentile_rank
FROM sales
ORDER BY amount DESC;
-- Interprétation : 95.0 = cette vente est dans le top 5%
-- Pattern 5 : Top N par groupe (PARTITION BY)
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY amount DESC
) AS rank_in_category
FROM sales
) t
WHERE rank_in_category <= 3;
-- Résultat : Top 3 des ventes par catégorie
Bloc 2 — Running totals (patterns 6-10)
-- Pattern 6 : Cumulative SUM — running total par date
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total,
SUM(amount) OVER () AS grand_total
FROM sales
ORDER BY sale_date;
-- Pattern 7 : Running average — moyenne cumulée
SELECT
sale_date,
amount,
ROUND(
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
), 2
) AS running_avg
FROM sales
ORDER BY sale_date;
-- Pattern 8 : Rolling MAX et MIN (sur les 7 dernières lignes)
SELECT
sale_date,
amount,
MAX(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS rolling_max_7,
MIN(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS rolling_min_7
FROM sales
ORDER BY sale_date;
-- Pattern 9 : YTD (Year-to-Date) par catégorie
SELECT
sale_date,
category,
amount,
SUM(amount) OVER (
PARTITION BY category, EXTRACT(YEAR FROM sale_date)
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS ytd_by_category
FROM sales
ORDER BY category, sale_date;
-- Pattern 10 : Cumulative percentage (part du total cumulée)
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total,
SUM(amount) OVER () AS grand_total,
ROUND(
SUM(amount) OVER (ORDER BY sale_date) /
SUM(amount) OVER () * 100, 1
) AS cumulative_pct
FROM sales
ORDER BY sale_date;
Bloc 3 — Lead / Lag (patterns 11-15)
-- Pattern 11 : LAG — variation J-1 (day-over-day)
WITH daily AS (
SELECT sale_date, SUM(amount) AS daily_revenue
FROM sales
GROUP BY sale_date
)
SELECT
sale_date,
daily_revenue,
LAG(daily_revenue, 1) OVER (ORDER BY sale_date) AS prev_day_revenue,
ROUND(
(daily_revenue - LAG(daily_revenue, 1) OVER (ORDER BY sale_date))
/ NULLIF(LAG(daily_revenue, 1) OVER (ORDER BY sale_date), 0) * 100, 1
) AS dod_pct_change
FROM daily
ORDER BY sale_date;
-- Pattern 12 : LEAD — voir la prochaine valeur (next day preview)
WITH daily AS (
SELECT sale_date, SUM(amount) AS daily_revenue
FROM sales
GROUP BY sale_date
)
SELECT
sale_date,
daily_revenue,
LEAD(daily_revenue, 1) OVER (ORDER BY sale_date) AS next_day_revenue,
LEAD(sale_date, 1) OVER (ORDER BY sale_date) AS next_day
FROM daily
ORDER BY sale_date;
-- Pattern 13 : MoM% (Month-over-Month) avec LAG décalage de N jours
WITH monthly AS (
SELECT
DATE_TRUNC('month', sale_date) AS month,
SUM(amount) AS monthly_revenue
FROM sales
GROUP BY DATE_TRUNC('month', sale_date)
)
SELECT
month,
monthly_revenue,
LAG(monthly_revenue) OVER (ORDER BY month) AS prev_month_revenue,
ROUND(
(monthly_revenue - LAG(monthly_revenue) OVER (ORDER BY month))
/ NULLIF(LAG(monthly_revenue) OVER (ORDER BY month), 0) * 100, 1
) AS mom_pct
FROM monthly
ORDER BY month;
-- Pattern 14 : Moving average 7 jours (sur les 6 précédents + courant)
WITH daily AS (
SELECT sale_date, SUM(amount) AS daily_revenue
FROM sales
GROUP BY sale_date
)
SELECT
sale_date,
daily_revenue,
ROUND(
AVG(daily_revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
), 2
) AS ma_7d,
ROUND(
AVG(daily_revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
), 2
) AS ma_30d
FROM daily
ORDER BY sale_date;
-- Pattern 15 : Comparaison avec la ligne précédente (même produit)
SELECT
sale_date,
product_id,
amount,
LAG(amount) OVER (
PARTITION BY product_id
ORDER BY sale_date
) AS prev_sale_same_product,
amount - LAG(amount) OVER (
PARTITION BY product_id
ORDER BY sale_date
) AS diff_vs_prev
FROM sales
ORDER BY product_id, sale_date;
Bloc 4 — Frames (patterns 16-20)
-- Pattern 16 : Moving average pondérée (weighted avg sur 7j — poids décroissants)
WITH daily AS (
SELECT sale_date, SUM(amount) AS rev
FROM sales GROUP BY sale_date
),
weights AS (
SELECT
sale_date, rev,
ROW_NUMBER() OVER (ORDER BY sale_date) AS rn
FROM daily
)
SELECT
sale_date,
rev,
-- Approximation : on utilise un CASE pour des poids explicites
ROUND(
AVG(rev) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
), 2
) AS simple_ma_7d
-- Pour un vrai WMA, voir Pattern avancé 25
FROM daily
ORDER BY sale_date;
-- Pattern 17 : FIRST_VALUE et LAST_VALUE dans une fenêtre
SELECT
sale_date,
category,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY category
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS first_sale_in_category,
LAST_VALUE(amount) OVER (
PARTITION BY category
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_sale_in_category
FROM sales
ORDER BY category, sale_date;
-- Pattern 18 : NTH_VALUE — la 2e valeur dans chaque groupe
SELECT
sale_date,
category,
amount,
NTH_VALUE(amount, 2) OVER (
PARTITION BY category
ORDER BY amount DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS second_highest_in_category
FROM sales
ORDER BY category, amount DESC;
-- Pattern 19 : Médiane glissante (approximation avec PERCENTILE_CONT)
-- Note : PERCENTILE_CONT est une ordered-set aggregate, pas une window function pure.
-- On l'utilise en sous-requête agrégée ici.
SELECT
category,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount) AS median_amount,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY amount) AS p25,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY amount) AS p75
FROM sales
GROUP BY category;
-- Pattern 20 : Écart-type glissant (volatilité des ventes)
WITH daily AS (
SELECT sale_date, SUM(amount) AS rev
FROM sales GROUP BY sale_date
)
SELECT
sale_date,
rev,
ROUND(
STDDEV(rev) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
), 2
) AS stddev_7d,
ROUND(
rev / NULLIF(
STDDEV(rev) OVER (
ORDER BY sale_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
), 0
), 2
) AS zscore_30d -- Détection d'anomalies
FROM daily
ORDER BY sale_date;
Bloc 5 — Advanced (patterns 21-25)
-- Pattern 21 : Gap detection (périodes sans ventes > N jours)
WITH daily AS (
SELECT DISTINCT sale_date FROM sales ORDER BY sale_date
),
with_gaps AS (
SELECT
sale_date,
LAG(sale_date) OVER (ORDER BY sale_date) AS prev_date,
sale_date - LAG(sale_date) OVER (ORDER BY sale_date) AS gap_days
FROM daily
)
SELECT
prev_date AS gap_start,
sale_date AS gap_end,
gap_days AS days_without_sales
FROM with_gaps
WHERE gap_days > 1
ORDER BY gap_days DESC;
-- Pattern 22 : Sessionization (regroupe les événements proches en sessions)
-- On simule des timestamps d'accès pour l'exemple
WITH events AS (
SELECT
customer_id,
sale_date + (ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY sale_date) * INTERVAL '2 hours') AS event_time
FROM sales
),
with_prev AS (
SELECT
customer_id,
event_time,
LAG(event_time) OVER (PARTITION BY customer_id ORDER BY event_time) AS prev_event_time,
EXTRACT(EPOCH FROM (
event_time - LAG(event_time) OVER (PARTITION BY customer_id ORDER BY event_time)
)) / 60 AS minutes_since_prev
FROM events
),
session_flags AS (
SELECT
*,
CASE WHEN minutes_since_prev > 30 OR minutes_since_prev IS NULL
THEN 1 ELSE 0 END AS is_new_session
FROM with_prev
)
SELECT
customer_id,
event_time,
SUM(is_new_session) OVER (
PARTITION BY customer_id
ORDER BY event_time
) AS session_number,
minutes_since_prev
FROM session_flags
ORDER BY customer_id, event_time;
-- Pattern 23 : Percentile par catégorie (PERCENT_RANK + seuils)
SELECT
product_id,
category,
amount,
ROUND(
PERCENT_RANK() OVER (
PARTITION BY category
ORDER BY amount
) * 100, 1
) AS pct_rank_in_category,
CASE
WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.9
THEN 'Top 10%'
WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.75
THEN 'Top 25%'
WHEN PERCENT_RANK() OVER (PARTITION BY category ORDER BY amount) >= 0.5
THEN 'Top 50%'
ELSE 'Bottom 50%'
END AS performance_tier
FROM sales
ORDER BY category, amount DESC;
-- Pattern 24 : ABC Analysis (Pareto — 80/20)
WITH product_revenue AS (
SELECT
product_id,
category,
SUM(amount) AS total_rev
FROM sales
GROUP BY product_id, category
),
with_cumulative AS (
SELECT
*,
SUM(total_rev) OVER (ORDER BY total_rev DESC) AS cumulative_rev,
SUM(total_rev) OVER () AS grand_total
FROM product_revenue
)
SELECT
product_id,
category,
ROUND(total_rev, 2) AS revenue,
ROUND(cumulative_rev / grand_total * 100, 1) AS cumulative_pct,
CASE
WHEN cumulative_rev / grand_total <= 0.80 THEN 'A — Prioritaire (80%)'
WHEN cumulative_rev / grand_total <= 0.95 THEN 'B — Important (15%)'
ELSE 'C — Marginal (5%)'
END AS abc_class
FROM with_cumulative
ORDER BY total_rev DESC;
-- Pattern 25 : Time-weighted average (moyenne pondérée par durée)
-- Utile pour le prix moyen pondéré d'un actif sur une période
WITH priced_intervals AS (
SELECT
sale_date,
amount AS price,
LEAD(sale_date) OVER (ORDER BY sale_date) AS next_date,
LEAD(sale_date) OVER (ORDER BY sale_date) - sale_date AS holding_days
FROM sales
WHERE product_id = 'P01' -- Focus sur un produit
)
SELECT
SUM(price * holding_days)::NUMERIC /
NULLIF(SUM(holding_days)::NUMERIC, 0) AS time_weighted_avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
AVG(price) AS simple_avg_price,
SUM(holding_days) AS total_days_observed
FROM priced_intervals
WHERE holding_days IS NOT NULL;
📥 Télécharger ce template
Télécharger le fichier .md · ← Retour catalogue · Parcours Data →
ITAG · Non-affiliés Snowflake / Google / Oracle / Microsoft.