# đŸ—„ïž SchĂ©ma Data Warehouse Ă©toile — SQL **Format** : SQL · PostgreSQL + Snowflake + BigQuery **Auteur** : Équipe pĂ©dagogique ITAG · Data Pack **Mise Ă  jour** : 2026 --- ## 🎯 Description SchĂ©ma star (modĂ©lisation Kimball) prĂȘt Ă  dĂ©ployer pour data warehouse. Tables de faits + dimensions, partitioning, indexes, vues matĂ©rialisĂ©es. Compatible PostgreSQL, Snowflake, BigQuery, Redshift. ## 📋 Contenu (4 fichiers SQL) ### 1. `01_dimensions.sql` - `dim_date` (avec hiĂ©rarchie annĂ©e / trimestre / mois / semaine / jour) - `dim_customer` (SCD Type 2 avec valid_from / valid_to) - `dim_product` (avec catĂ©gories hiĂ©rarchiques) - `dim_geography` (pays / rĂ©gion / ville) - `dim_channel` (online / offline / partenaire) ### 2. `02_facts.sql` - `fact_sales` (partitionnĂ©e par date, FK vers toutes les dimensions) - `fact_inventory` (snapshot quotidien) - `fact_web_events` (granulaire, pour funnel analysis) ### 3. `03_indexes.sql` - Indexes B-tree sur clĂ©s de jointure - Indexes BRIN pour trĂšs gros volumes (PostgreSQL) - Clustering keys (Snowflake) ### 4. `04_views.sql` - `vw_sales_daily` — agrĂ©gation prĂ©-calculĂ©e - `vw_customer_360` — vue dĂ©normalisĂ©e - `vw_cohort_retention` — analyse rĂ©tention ## đŸ’Ÿ SchĂ©ma SQL complet ### Dimension — dim_customer (SCD Type 2) ```sql CREATE TABLE dim_customer ( customer_key SERIAL PRIMARY KEY, customer_id VARCHAR(50) NOT NULL, -- ClĂ© naturelle (source) name VARCHAR(200) NOT NULL, email VARCHAR(200), country VARCHAR(100), city VARCHAR(100), segment VARCHAR(50), -- B2B / B2C / SME / Enterprise created_at DATE, valid_from DATE NOT NULL DEFAULT CURRENT_DATE, valid_to DATE DEFAULT '9999-12-31', is_current BOOLEAN DEFAULT TRUE, _etl_loaded TIMESTAMP DEFAULT NOW() ); INSERT INTO dim_customer (customer_id, name, email, country, city, segment, created_at) VALUES ('C001', 'Acme Corp', 'contact@acme.com', 'France', 'Paris', 'Enterprise', '2022-01-15'), ('C002', 'Dupont SARL', 'info@dupont.fr', 'France', 'Lyon', 'SME', '2022-03-20'), ('C003', 'TechStart Ltd', 'hello@techstart.io', 'Canada', 'MontrĂ©al', 'B2B', '2022-06-01'), ('C004', 'Marie Curie', 'marie@example.com', 'Belgique', 'Bruxelles', 'B2C', '2023-01-10'), ('C005', 'Global Trade SA', 'trading@global.com', 'SĂ©nĂ©gal', 'Dakar', 'Enterprise', '2023-02-14'), ('C006', 'InnovateCo', 'ceo@innovate.co', 'Maroc', 'Casablanca', 'SME', '2023-05-22'), ('C007', 'Jean Lefebvre', 'jean.l@mail.fr', 'France', 'Toulouse', 'B2C', '2023-08-30'), ('C008', 'DataPlex Inc', 'ops@dataplex.ca', 'Canada', 'Toronto', 'Enterprise', '2024-01-05'), ('C009', 'Soleil RCI', 'admin@soleil-ci.com', 'CĂŽte d''Ivoire', 'Abidjan', 'B2B', '2024-03-11'), ('C010', 'Atlas Logistic', 'logistics@atlas.ma', 'Maroc', 'Rabat', 'SME', '2024-07-19'); ``` ### Dimension — dim_product ```sql CREATE TABLE dim_product ( product_key SERIAL PRIMARY KEY, product_id VARCHAR(50) NOT NULL, name VARCHAR(200) NOT NULL, category VARCHAR(100), subcategory VARCHAR(100), brand VARCHAR(100), price NUMERIC(12,2), cost NUMERIC(12,2), margin_pct NUMERIC(5,2) GENERATED ALWAYS AS (ROUND((price - cost) / NULLIF(price, 0) * 100, 2)) STORED, is_active BOOLEAN DEFAULT TRUE, _etl_loaded TIMESTAMP DEFAULT NOW() ); INSERT INTO dim_product (product_id, name, category, subcategory, brand, price, cost) VALUES ('P001', 'Formation SQL Expert', 'Formation', 'Data', 'ITAG', 299.00, 45.00), ('P002', 'Certif. Cloud AWS', 'Certification','Infrastructure','ITAG', 499.00, 80.00), ('P003', 'Pack Data Engineer', 'Pack', 'Data', 'ITAG', 799.00, 120.00), ('P004', 'Abonnement Premium 1 an', 'Abonnement', 'AccĂšs', 'ITAG', 199.00, 20.00), ('P005', 'Formation Python', 'Formation', 'Dev', 'ITAG', 249.00, 38.00), ('P006', 'Certif. PMP', 'Certification','Management', 'ITAG', 399.00, 65.00), ('P007', 'Formation Excel AvancĂ©', 'Formation', 'Business', 'ITAG', 149.00, 22.00), ('P008', 'Pack Langue DELF B2', 'Pack', 'Langue', 'ITAG', 349.00, 55.00), ('P009', 'Abonnement Équipe 5 users','Abonnement', 'AccĂšs', 'ITAG', 799.00, 60.00), ('P010', 'Formation NCLEX-RN', 'Formation', 'SantĂ©', 'ITAG', 549.00, 90.00); ``` ### Dimension — dim_date ```sql CREATE TABLE dim_date ( date_key INT PRIMARY KEY, -- Format YYYYMMDD : 20240101 full_date DATE NOT NULL, year SMALLINT, quarter SMALLINT, month SMALLINT, month_name VARCHAR(20), week SMALLINT, day_of_week SMALLINT, -- 0=Dimanche, 6=Samedi day_name VARCHAR(20), is_weekend BOOLEAN DEFAULT FALSE, is_holiday BOOLEAN DEFAULT FALSE ); INSERT INTO dim_date (date_key, full_date, year, quarter, month, month_name, week, day_of_week, day_name, is_weekend) VALUES (20240101, '2024-01-01', 2024, 1, 1, 'Janvier', 1, 1, 'Lundi', FALSE), (20240201, '2024-02-01', 2024, 1, 2, 'FĂ©vrier', 5, 4, 'Jeudi', FALSE), (20240301, '2024-03-01', 2024, 1, 3, 'Mars', 9, 5, 'Vendredi',FALSE), (20240401, '2024-04-01', 2024, 2, 4, 'Avril', 14, 1, 'Lundi', FALSE), (20240501, '2024-05-01', 2024, 2, 5, 'Mai', 18, 3, 'Mercredi',FALSE), (20240601, '2024-06-01', 2024, 2, 6, 'Juin', 22, 6, 'Samedi', TRUE), (20240701, '2024-07-01', 2024, 3, 7, 'Juillet', 27, 1, 'Lundi', FALSE), (20240801, '2024-08-01', 2024, 3, 8, 'AoĂ»t', 31, 4, 'Jeudi', FALSE), (20240901, '2024-09-01', 2024, 3, 9, 'Septembre',35, 0, 'Dimanche',TRUE), (20241001, '2024-10-01', 2024, 4, 10, 'Octobre', 40, 2, 'Mardi', FALSE); ``` ### Dimension — dim_geography ```sql CREATE TABLE dim_geography ( geo_key SERIAL PRIMARY KEY, country_code CHAR(2) NOT NULL, -- ISO 3166-1 alpha-2 country VARCHAR(100) NOT NULL, region VARCHAR(100), city VARCHAR(100), timezone VARCHAR(50) ); INSERT INTO dim_geography (country_code, country, region, city, timezone) VALUES ('FR', 'France', 'Europe de l''Ouest', 'Paris', 'Europe/Paris'), ('FR', 'France', 'Europe de l''Ouest', 'Lyon', 'Europe/Paris'), ('FR', 'France', 'Europe de l''Ouest', 'Toulouse', 'Europe/Paris'), ('CA', 'Canada', 'AmĂ©rique du Nord', 'MontrĂ©al', 'America/Montreal'), ('CA', 'Canada', 'AmĂ©rique du Nord', 'Toronto', 'America/Toronto'), ('BE', 'Belgique', 'Europe de l''Ouest', 'Bruxelles', 'Europe/Brussels'), ('SN', 'SĂ©nĂ©gal', 'Afrique de l''Ouest','Dakar', 'Africa/Dakar'), ('MA', 'Maroc', 'Afrique du Nord', 'Casablanca', 'Africa/Casablanca'), ('MA', 'Maroc', 'Afrique du Nord', 'Rabat', 'Africa/Casablanca'), ('CI', 'CĂŽte d''Ivoire','Afrique de l''Ouest','Abidjan', 'Africa/Abidjan'); ``` ### Table de faits — fact_sales ```sql CREATE TABLE fact_sales ( sale_id BIGSERIAL PRIMARY KEY, date_key INT NOT NULL REFERENCES dim_date(date_key), customer_key INT NOT NULL REFERENCES dim_customer(customer_key), product_key INT NOT NULL REFERENCES dim_product(product_key), geo_key INT NOT NULL REFERENCES dim_geography(geo_key), quantity INT NOT NULL DEFAULT 1, unit_price NUMERIC(12,2) NOT NULL, revenue NUMERIC(12,2) NOT NULL, -- quantity × unit_price cost NUMERIC(12,2), margin NUMERIC(12,2), -- revenue - cost discount_pct NUMERIC(5,2) DEFAULT 0, channel VARCHAR(50), -- 'web', 'mobile', 'partner' _etl_loaded TIMESTAMP DEFAULT NOW() ) PARTITION BY RANGE (date_key); -- Partitions par annĂ©e CREATE TABLE fact_sales_2023 PARTITION OF fact_sales FOR VALUES FROM (20230101) TO (20231231); CREATE TABLE fact_sales_2024 PARTITION OF fact_sales FOR VALUES FROM (20240101) TO (20241231); INSERT INTO fact_sales (date_key, customer_key, product_key, geo_key, quantity, unit_price, revenue, cost, margin, discount_pct, channel) VALUES (20240101, 1, 1, 1, 1, 299.00, 299.00, 45.00, 254.00, 0, 'web'), (20240201, 2, 4, 2, 2, 199.00, 398.00, 40.00, 358.00, 0, 'web'), (20240301, 3, 2, 4, 1, 499.00, 499.00, 80.00, 419.00, 0, 'mobile'), (20240401, 4, 5, 6, 1, 249.00, 249.00, 38.00, 211.00, 10.0, 'web'), (20240501, 5, 3, 7, 1, 799.00, 799.00, 120.00, 679.00, 0, 'partner'), (20240601, 6, 6, 8, 1, 399.00, 399.00, 65.00, 334.00, 5.0, 'web'), (20240701, 7, 7, 3, 3, 149.00, 447.00, 66.00, 381.00, 0, 'mobile'), (20240801, 8, 9, 5, 1, 799.00, 799.00, 60.00, 739.00, 0, 'partner'), (20240901, 9, 10, 10, 1, 549.00, 549.00, 90.00, 459.00, 15.0, 'web'), (20241001, 10, 8, 9, 1, 349.00, 349.00, 55.00, 294.00, 0, 'web'); ``` ### Table de faits — fact_events (Web analytics) ```sql CREATE TABLE fact_events ( event_id BIGSERIAL PRIMARY KEY, date_key INT NOT NULL REFERENCES dim_date(date_key), customer_key INT REFERENCES dim_customer(customer_key), event_type VARCHAR(50) NOT NULL, -- 'page_view', 'click', 'purchase', 'signup' page VARCHAR(200), session_id VARCHAR(100), device VARCHAR(30), -- 'desktop', 'mobile', 'tablet' duration_sec INT, _etl_loaded TIMESTAMP DEFAULT NOW() ); INSERT INTO fact_events (date_key, customer_key, event_type, page, session_id, device, duration_sec) VALUES (20240101, 1, 'page_view', '/cours/sql-avance', 'sess_a1b2', 'desktop', 245), (20240101, 1, 'click', '/cours/sql-avance#acheter','sess_a1b2', 'desktop', 5), (20240101, 1, 'purchase', '/checkout', 'sess_a1b2', 'desktop', 120), (20240201, 2, 'page_view', '/simulateurs/delf-b2', 'sess_c3d4', 'mobile', 180), (20240201, 2, 'signup', '/inscription', 'sess_c3d4', 'mobile', 90), (20240301, NULL, 'page_view', '/blog/sql-window', 'sess_e5f6', 'desktop', 420), (20240301, 3, 'page_view', '/certifications/aws', 'sess_g7h8', 'tablet', 310), (20240401, 4, 'page_view', '/cours/python', 'sess_i9j0', 'mobile', 195), (20240501, 5, 'page_view', '/parcours/data', 'sess_k1l2', 'desktop', 560), (20240601, NULL, 'page_view', '/accueil', 'sess_m3n4', 'mobile', 45); ``` ### Index — Optimisation des jointures ```sql -- Indexes sur les clĂ©s Ă©trangĂšres de fact_sales CREATE INDEX idx_fact_sales_date_key ON fact_sales (date_key); CREATE INDEX idx_fact_sales_customer_key ON fact_sales (customer_key); CREATE INDEX idx_fact_sales_product_key ON fact_sales (product_key); CREATE INDEX idx_fact_sales_geo_key ON fact_sales (geo_key); CREATE INDEX idx_fact_sales_channel ON fact_sales (channel); -- Index composite pour les requĂȘtes analytiques courantes CREATE INDEX idx_fact_sales_date_product ON fact_sales (date_key, product_key); CREATE INDEX idx_fact_sales_date_customer ON fact_sales (date_key, customer_key); -- Index sur fact_events CREATE INDEX idx_fact_events_date_key ON fact_events (date_key); CREATE INDEX idx_fact_events_customer_key ON fact_events (customer_key); CREATE INDEX idx_fact_events_session_id ON fact_events (session_id); CREATE INDEX idx_fact_events_event_type ON fact_events (event_type); -- Index BRIN pour trĂšs gros volumes (colonnes monotones comme date_key) -- (PostgreSQL uniquement — plus lĂ©ger qu'un B-tree sur partitions larges) CREATE INDEX idx_fact_sales_brin ON fact_sales USING BRIN (date_key); ``` ### Vues — vw_customer_360 ```sql CREATE OR REPLACE VIEW vw_customer_360 AS SELECT c.customer_id, c.name AS customer_name, c.segment, c.country, COUNT(DISTINCT fs.sale_id) AS total_orders, SUM(fs.revenue) AS lifetime_revenue, AVG(fs.revenue) AS avg_order_value, MAX(dd.full_date) AS last_purchase_date, MIN(dd.full_date) AS first_purchase_date, COUNT(DISTINCT fs.product_key) AS distinct_products_bought, -- Segmentation RFM simplifiĂ©e CASE WHEN SUM(fs.revenue) > 1000 AND COUNT(*) > 3 THEN 'Champion' WHEN SUM(fs.revenue) > 500 THEN 'Loyal' WHEN MAX(dd.full_date) > CURRENT_DATE - 90 THEN 'RĂ©cent' ELSE 'À risque' END AS rfm_segment FROM dim_customer c LEFT JOIN fact_sales fs ON c.customer_key = fs.customer_key LEFT JOIN dim_date dd ON fs.date_key = dd.date_key WHERE c.is_current = TRUE GROUP BY c.customer_id, c.name, c.segment, c.country; ``` ### Vue — vw_cohort_retention ```sql CREATE OR REPLACE VIEW vw_cohort_retention AS WITH first_purchase AS ( SELECT customer_key, MIN(date_key) AS cohort_date_key, DATE_TRUNC('month', (SELECT full_date FROM dim_date WHERE date_key = MIN(fs.date_key)) ) AS cohort_month FROM fact_sales fs GROUP BY customer_key ), purchases_by_month AS ( SELECT fs.customer_key, DATE_TRUNC('month', dd.full_date) AS purchase_month FROM fact_sales fs JOIN dim_date dd ON fs.date_key = dd.date_key GROUP BY fs.customer_key, DATE_TRUNC('month', dd.full_date) ), cohort_data AS ( SELECT fp.cohort_month, pbm.purchase_month, EXTRACT(MONTH FROM AGE(pbm.purchase_month, fp.cohort_month))::INT AS months_since_first, COUNT(DISTINCT fp.customer_key) AS cohort_size, COUNT(DISTINCT pbm.customer_key) AS active_customers FROM first_purchase fp JOIN purchases_by_month pbm ON fp.customer_key = pbm.customer_key GROUP BY fp.cohort_month, pbm.purchase_month, EXTRACT(MONTH FROM AGE(pbm.purchase_month, fp.cohort_month)) ) SELECT cohort_month, months_since_first, cohort_size, active_customers, ROUND(active_customers::NUMERIC / NULLIF(cohort_size, 0) * 100, 1) AS retention_pct FROM cohort_data ORDER BY cohort_month, months_since_first; ``` ### Vue — vw_monthly_revenue ```sql CREATE OR REPLACE VIEW vw_monthly_revenue AS SELECT dd.year, dd.month, dd.month_name, p.category AS product_category, g.country, COUNT(DISTINCT fs.sale_id) AS orders, COUNT(DISTINCT fs.customer_key) AS unique_customers, SUM(fs.revenue) AS revenue, SUM(fs.margin) AS gross_margin, ROUND(SUM(fs.margin) / NULLIF(SUM(fs.revenue), 0) * 100, 1) AS margin_pct, -- YTD cumulĂ© via window function SUM(SUM(fs.revenue)) OVER ( PARTITION BY dd.year, p.category ORDER BY dd.month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS revenue_ytd FROM fact_sales fs JOIN dim_date dd ON fs.date_key = dd.date_key JOIN dim_product p ON fs.product_key = p.product_key JOIN dim_geography g ON fs.geo_key = g.geo_key GROUP BY dd.year, dd.month, dd.month_name, p.category, g.country ORDER BY dd.year, dd.month; ``` ## đŸ’Œ Cas d'usage - DĂ©marrer une plateforme analytics from scratch - Migration legacy DWH vers Snowflake/BigQuery - Pratique pour interview Data Engineer - Architecture lakehouse / dbt project ## đŸ“„ TĂ©lĂ©charger ce template [TĂ©lĂ©charger le fichier .md](/templates/view.php?file=sql-data-warehouse-schema&dl=1) · [← Retour catalogue](/templates.php?cat=sql) · [Parcours Data →](/parcours/tech-data.php) --- *ITAG · Non-affiliĂ©s Snowflake / Google / AWS / dbt Labs.*