📋 ITAG Templates · Catalogue 30+ templates pro · Voir tout
← Retour au catalogue 📥 Télécharger ce template

🗄️ Schéma Data Warehouse étoile — SQL

Format : SQL · PostgreSQL + Snowflake + BigQuery Auteur : Équipe pédagogique ITAG · Data Pack Mise à jour : 2026


🎯 Description

Schéma star (modélisation Kimball) prêt à déployer pour data warehouse. Tables de faits + dimensions, partitioning, indexes, vues matérialisées. Compatible PostgreSQL, Snowflake, BigQuery, Redshift.

📋 Contenu (4 fichiers SQL)

1. 01_dimensions.sql

2. 02_facts.sql

3. 03_indexes.sql

4. 04_views.sql

💾 Schéma SQL complet

Dimension — dim_customer (SCD Type 2)

CREATE TABLE dim_customer (
    customer_key  SERIAL PRIMARY KEY,
    customer_id   VARCHAR(50)  NOT NULL,       -- Clé naturelle (source)
    name          VARCHAR(200) NOT NULL,
    email         VARCHAR(200),
    country       VARCHAR(100),
    city          VARCHAR(100),
    segment       VARCHAR(50),                 -- B2B / B2C / SME / Enterprise
    created_at    DATE,
    valid_from    DATE         NOT NULL DEFAULT CURRENT_DATE,
    valid_to      DATE         DEFAULT '9999-12-31',
    is_current    BOOLEAN      DEFAULT TRUE,
    _etl_loaded   TIMESTAMP    DEFAULT NOW()
);

INSERT INTO dim_customer (customer_id, name, email, country, city, segment, created_at) VALUES ('C001', 'Acme Corp', 'contact@acme.com', 'France', 'Paris', 'Enterprise', '2022-01-15'), ('C002', 'Dupont SARL', 'info@dupont.fr', 'France', 'Lyon', 'SME', '2022-03-20'), ('C003', 'TechStart Ltd', 'hello@techstart.io', 'Canada', 'Montréal', 'B2B', '2022-06-01'), ('C004', 'Marie Curie', 'marie@example.com', 'Belgique', 'Bruxelles', 'B2C', '2023-01-10'), ('C005', 'Global Trade SA', 'trading@global.com', 'Sénégal', 'Dakar', 'Enterprise', '2023-02-14'), ('C006', 'InnovateCo', 'ceo@innovate.co', 'Maroc', 'Casablanca', 'SME', '2023-05-22'), ('C007', 'Jean Lefebvre', 'jean.l@mail.fr', 'France', 'Toulouse', 'B2C', '2023-08-30'), ('C008', 'DataPlex Inc', 'ops@dataplex.ca', 'Canada', 'Toronto', 'Enterprise', '2024-01-05'), ('C009', 'Soleil RCI', 'admin@soleil-ci.com', 'Côte d''Ivoire', 'Abidjan', 'B2B', '2024-03-11'), ('C010', 'Atlas Logistic', 'logistics@atlas.ma', 'Maroc', 'Rabat', 'SME', '2024-07-19');

Dimension — dim_product

CREATE TABLE dim_product (
    product_key   SERIAL PRIMARY KEY,
    product_id    VARCHAR(50)  NOT NULL,
    name          VARCHAR(200) NOT NULL,
    category      VARCHAR(100),
    subcategory   VARCHAR(100),
    brand         VARCHAR(100),
    price         NUMERIC(12,2),
    cost          NUMERIC(12,2),
    margin_pct    NUMERIC(5,2) GENERATED ALWAYS AS
                  (ROUND((price - cost) / NULLIF(price, 0) * 100, 2)) STORED,
    is_active     BOOLEAN DEFAULT TRUE,
    _etl_loaded   TIMESTAMP DEFAULT NOW()
);

INSERT INTO dim_product (product_id, name, category, subcategory, brand, price, cost) VALUES ('P001', 'Formation SQL Expert', 'Formation', 'Data', 'ITAG', 299.00, 45.00), ('P002', 'Certif. Cloud AWS', 'Certification','Infrastructure','ITAG', 499.00, 80.00), ('P003', 'Pack Data Engineer', 'Pack', 'Data', 'ITAG', 799.00, 120.00), ('P004', 'Abonnement Premium 1 an', 'Abonnement', 'Accès', 'ITAG', 199.00, 20.00), ('P005', 'Formation Python', 'Formation', 'Dev', 'ITAG', 249.00, 38.00), ('P006', 'Certif. PMP', 'Certification','Management', 'ITAG', 399.00, 65.00), ('P007', 'Formation Excel Avancé', 'Formation', 'Business', 'ITAG', 149.00, 22.00), ('P008', 'Pack Langue DELF B2', 'Pack', 'Langue', 'ITAG', 349.00, 55.00), ('P009', 'Abonnement Équipe 5 users','Abonnement', 'Accès', 'ITAG', 799.00, 60.00), ('P010', 'Formation NCLEX-RN', 'Formation', 'Santé', 'ITAG', 549.00, 90.00);

Dimension — dim_date

CREATE TABLE dim_date (
    date_key     INT PRIMARY KEY,   -- Format YYYYMMDD : 20240101
    full_date    DATE NOT NULL,
    year         SMALLINT,
    quarter      SMALLINT,
    month        SMALLINT,
    month_name   VARCHAR(20),
    week         SMALLINT,
    day_of_week  SMALLINT,          -- 0=Dimanche, 6=Samedi
    day_name     VARCHAR(20),
    is_weekend   BOOLEAN DEFAULT FALSE,
    is_holiday   BOOLEAN DEFAULT FALSE
);

INSERT INTO dim_date (date_key, full_date, year, quarter, month, month_name, week, day_of_week, day_name, is_weekend) VALUES (20240101, '2024-01-01', 2024, 1, 1, 'Janvier', 1, 1, 'Lundi', FALSE), (20240201, '2024-02-01', 2024, 1, 2, 'Février', 5, 4, 'Jeudi', FALSE), (20240301, '2024-03-01', 2024, 1, 3, 'Mars', 9, 5, 'Vendredi',FALSE), (20240401, '2024-04-01', 2024, 2, 4, 'Avril', 14, 1, 'Lundi', FALSE), (20240501, '2024-05-01', 2024, 2, 5, 'Mai', 18, 3, 'Mercredi',FALSE), (20240601, '2024-06-01', 2024, 2, 6, 'Juin', 22, 6, 'Samedi', TRUE), (20240701, '2024-07-01', 2024, 3, 7, 'Juillet', 27, 1, 'Lundi', FALSE), (20240801, '2024-08-01', 2024, 3, 8, 'Août', 31, 4, 'Jeudi', FALSE), (20240901, '2024-09-01', 2024, 3, 9, 'Septembre',35, 0, 'Dimanche',TRUE), (20241001, '2024-10-01', 2024, 4, 10, 'Octobre', 40, 2, 'Mardi', FALSE);

Dimension — dim_geography

CREATE TABLE dim_geography (
    geo_key      SERIAL PRIMARY KEY,
    country_code CHAR(2)      NOT NULL,  -- ISO 3166-1 alpha-2
    country      VARCHAR(100) NOT NULL,
    region       VARCHAR(100),
    city         VARCHAR(100),
    timezone     VARCHAR(50)
);

INSERT INTO dim_geography (country_code, country, region, city, timezone) VALUES ('FR', 'France', 'Europe de l''Ouest', 'Paris', 'Europe/Paris'), ('FR', 'France', 'Europe de l''Ouest', 'Lyon', 'Europe/Paris'), ('FR', 'France', 'Europe de l''Ouest', 'Toulouse', 'Europe/Paris'), ('CA', 'Canada', 'Amérique du Nord', 'Montréal', 'America/Montreal'), ('CA', 'Canada', 'Amérique du Nord', 'Toronto', 'America/Toronto'), ('BE', 'Belgique', 'Europe de l''Ouest', 'Bruxelles', 'Europe/Brussels'), ('SN', 'Sénégal', 'Afrique de l''Ouest','Dakar', 'Africa/Dakar'), ('MA', 'Maroc', 'Afrique du Nord', 'Casablanca', 'Africa/Casablanca'), ('MA', 'Maroc', 'Afrique du Nord', 'Rabat', 'Africa/Casablanca'), ('CI', 'Côte d''Ivoire','Afrique de l''Ouest','Abidjan', 'Africa/Abidjan');

Table de faits — fact_sales

CREATE TABLE fact_sales (
    sale_id       BIGSERIAL    PRIMARY KEY,
    date_key      INT          NOT NULL REFERENCES dim_date(date_key),
    customer_key  INT          NOT NULL REFERENCES dim_customer(customer_key),
    product_key   INT          NOT NULL REFERENCES dim_product(product_key),
    geo_key       INT          NOT NULL REFERENCES dim_geography(geo_key),
    quantity      INT          NOT NULL DEFAULT 1,
    unit_price    NUMERIC(12,2) NOT NULL,
    revenue       NUMERIC(12,2) NOT NULL,     -- quantity × unit_price
    cost          NUMERIC(12,2),
    margin        NUMERIC(12,2),              -- revenue - cost
    discount_pct  NUMERIC(5,2) DEFAULT 0,
    channel       VARCHAR(50),               -- 'web', 'mobile', 'partner'
    _etl_loaded   TIMESTAMP DEFAULT NOW()
) PARTITION BY RANGE (date_key);

-- Partitions par année CREATE TABLE fact_sales_2023 PARTITION OF fact_sales FOR VALUES FROM (20230101) TO (20231231); CREATE TABLE fact_sales_2024 PARTITION OF fact_sales FOR VALUES FROM (20240101) TO (20241231);

INSERT INTO fact_sales (date_key, customer_key, product_key, geo_key, quantity, unit_price, revenue, cost, margin, discount_pct, channel) VALUES (20240101, 1, 1, 1, 1, 299.00, 299.00, 45.00, 254.00, 0, 'web'), (20240201, 2, 4, 2, 2, 199.00, 398.00, 40.00, 358.00, 0, 'web'), (20240301, 3, 2, 4, 1, 499.00, 499.00, 80.00, 419.00, 0, 'mobile'), (20240401, 4, 5, 6, 1, 249.00, 249.00, 38.00, 211.00, 10.0, 'web'), (20240501, 5, 3, 7, 1, 799.00, 799.00, 120.00, 679.00, 0, 'partner'), (20240601, 6, 6, 8, 1, 399.00, 399.00, 65.00, 334.00, 5.0, 'web'), (20240701, 7, 7, 3, 3, 149.00, 447.00, 66.00, 381.00, 0, 'mobile'), (20240801, 8, 9, 5, 1, 799.00, 799.00, 60.00, 739.00, 0, 'partner'), (20240901, 9, 10, 10, 1, 549.00, 549.00, 90.00, 459.00, 15.0, 'web'), (20241001, 10, 8, 9, 1, 349.00, 349.00, 55.00, 294.00, 0, 'web');

Table de faits — fact_events (Web analytics)

CREATE TABLE fact_events (
    event_id      BIGSERIAL    PRIMARY KEY,
    date_key      INT          NOT NULL REFERENCES dim_date(date_key),
    customer_key  INT          REFERENCES dim_customer(customer_key),
    event_type    VARCHAR(50)  NOT NULL,  -- 'page_view', 'click', 'purchase', 'signup'
    page          VARCHAR(200),
    session_id    VARCHAR(100),
    device        VARCHAR(30),            -- 'desktop', 'mobile', 'tablet'
    duration_sec  INT,
    _etl_loaded   TIMESTAMP DEFAULT NOW()
);

INSERT INTO fact_events (date_key, customer_key, event_type, page, session_id, device, duration_sec) VALUES (20240101, 1, 'page_view', '/cours/sql-avance', 'sess_a1b2', 'desktop', 245), (20240101, 1, 'click', '/cours/sql-avance#acheter','sess_a1b2', 'desktop', 5), (20240101, 1, 'purchase', '/checkout', 'sess_a1b2', 'desktop', 120), (20240201, 2, 'page_view', '/simulateurs/delf-b2', 'sess_c3d4', 'mobile', 180), (20240201, 2, 'signup', '/inscription', 'sess_c3d4', 'mobile', 90), (20240301, NULL, 'page_view', '/blog/sql-window', 'sess_e5f6', 'desktop', 420), (20240301, 3, 'page_view', '/certifications/aws', 'sess_g7h8', 'tablet', 310), (20240401, 4, 'page_view', '/cours/python', 'sess_i9j0', 'mobile', 195), (20240501, 5, 'page_view', '/parcours/data', 'sess_k1l2', 'desktop', 560), (20240601, NULL, 'page_view', '/accueil', 'sess_m3n4', 'mobile', 45);

Index — Optimisation des jointures

-- Indexes sur les clés étrangères de fact_sales
CREATE INDEX idx_fact_sales_date_key      ON fact_sales (date_key);
CREATE INDEX idx_fact_sales_customer_key  ON fact_sales (customer_key);
CREATE INDEX idx_fact_sales_product_key   ON fact_sales (product_key);
CREATE INDEX idx_fact_sales_geo_key       ON fact_sales (geo_key);
CREATE INDEX idx_fact_sales_channel       ON fact_sales (channel);

-- Index composite pour les requêtes analytiques courantes CREATE INDEX idx_fact_sales_date_product ON fact_sales (date_key, product_key); CREATE INDEX idx_fact_sales_date_customer ON fact_sales (date_key, customer_key);

-- Index sur fact_events CREATE INDEX idx_fact_events_date_key ON fact_events (date_key); CREATE INDEX idx_fact_events_customer_key ON fact_events (customer_key); CREATE INDEX idx_fact_events_session_id ON fact_events (session_id); CREATE INDEX idx_fact_events_event_type ON fact_events (event_type);

-- Index BRIN pour très gros volumes (colonnes monotones comme date_key) -- (PostgreSQL uniquement — plus léger qu'un B-tree sur partitions larges) CREATE INDEX idx_fact_sales_brin ON fact_sales USING BRIN (date_key);

Vues — vw_customer_360

CREATE OR REPLACE VIEW vw_customer_360 AS
SELECT
    c.customer_id,
    c.name                              AS customer_name,
    c.segment,
    c.country,
    COUNT(DISTINCT fs.sale_id)          AS total_orders,
    SUM(fs.revenue)                     AS lifetime_revenue,
    AVG(fs.revenue)                     AS avg_order_value,
    MAX(dd.full_date)                   AS last_purchase_date,
    MIN(dd.full_date)                   AS first_purchase_date,
    COUNT(DISTINCT fs.product_key)      AS distinct_products_bought,
    -- Segmentation RFM simplifiée
    CASE
        WHEN SUM(fs.revenue) > 1000 AND COUNT(*) > 3 THEN 'Champion'
        WHEN SUM(fs.revenue) > 500                   THEN 'Loyal'
        WHEN MAX(dd.full_date) > CURRENT_DATE - 90   THEN 'Récent'
        ELSE 'À risque'
    END                                 AS rfm_segment
FROM dim_customer c
LEFT JOIN fact_sales fs  ON c.customer_key = fs.customer_key
LEFT JOIN dim_date   dd  ON fs.date_key    = dd.date_key
WHERE c.is_current = TRUE
GROUP BY c.customer_id, c.name, c.segment, c.country;

Vue — vw_cohort_retention

CREATE OR REPLACE VIEW vw_cohort_retention AS
WITH first_purchase AS (
    SELECT
        customer_key,
        MIN(date_key)                      AS cohort_date_key,
        DATE_TRUNC('month',
            (SELECT full_date FROM dim_date WHERE date_key = MIN(fs.date_key))
        )                                  AS cohort_month
    FROM fact_sales fs
    GROUP BY customer_key
),
purchases_by_month AS (
    SELECT
        fs.customer_key,
        DATE_TRUNC('month', dd.full_date)  AS purchase_month
    FROM fact_sales fs
    JOIN dim_date dd ON fs.date_key = dd.date_key
    GROUP BY fs.customer_key, DATE_TRUNC('month', dd.full_date)
),
cohort_data AS (
    SELECT
        fp.cohort_month,
        pbm.purchase_month,
        EXTRACT(MONTH FROM AGE(pbm.purchase_month, fp.cohort_month))::INT AS months_since_first,
        COUNT(DISTINCT fp.customer_key)     AS cohort_size,
        COUNT(DISTINCT pbm.customer_key)    AS active_customers
    FROM first_purchase fp
    JOIN purchases_by_month pbm ON fp.customer_key = pbm.customer_key
    GROUP BY fp.cohort_month, pbm.purchase_month,
             EXTRACT(MONTH FROM AGE(pbm.purchase_month, fp.cohort_month))
)
SELECT
    cohort_month,
    months_since_first,
    cohort_size,
    active_customers,
    ROUND(active_customers::NUMERIC / NULLIF(cohort_size, 0) * 100, 1) AS retention_pct
FROM cohort_data
ORDER BY cohort_month, months_since_first;

Vue — vw_monthly_revenue

CREATE OR REPLACE VIEW vw_monthly_revenue AS
SELECT
    dd.year,
    dd.month,
    dd.month_name,
    p.category                               AS product_category,
    g.country,
    COUNT(DISTINCT fs.sale_id)               AS orders,
    COUNT(DISTINCT fs.customer_key)          AS unique_customers,
    SUM(fs.revenue)                          AS revenue,
    SUM(fs.margin)                           AS gross_margin,
    ROUND(SUM(fs.margin) / NULLIF(SUM(fs.revenue), 0) * 100, 1) AS margin_pct,
    -- YTD cumulé via window function
    SUM(SUM(fs.revenue)) OVER (
        PARTITION BY dd.year, p.category
        ORDER BY dd.month
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )                                        AS revenue_ytd
FROM fact_sales fs
JOIN dim_date     dd ON fs.date_key    = dd.date_key
JOIN dim_product  p  ON fs.product_key = p.product_key
JOIN dim_geography g ON fs.geo_key     = g.geo_key
GROUP BY dd.year, dd.month, dd.month_name, p.category, g.country
ORDER BY dd.year, dd.month;

💼 Cas d'usage

📥 Télécharger ce template

Télécharger le fichier .md · ← Retour catalogue · Parcours Data →


ITAG · Non-affiliés Snowflake / Google / AWS / dbt Labs.

Template prêt pour vous

Tous les templates ITAG sont produits par notre équipe pédagogique. Téléchargement gratuit, usage libre.

📋 Catalogue complet 📅 Coach 1:1