Forecast improvements

2026-06-11 14:55:33 -04:00
parent 9ff744399f
commit 3b2f51e6b8
5 changed files with 887 additions and 138 deletions
@@ -634,6 +634,52 @@ def forecast_from_curve(curve_params, scale_factor, age_days, horizon_days):
    return np.array(forecasts)


+def forecast_preorder(curve_params, scale_factor, days_until_arrival,
+                      preorder_daily_rate, horizon_days):
+    """
+    Piecewise pre-order forecast: a flat observed pre-order trickle until the
+    product is expected to arrive, then the scaled launch curve from age 0.
+
+    The launch curve was fit on POST-receipt order history, so running it from
+    today (while the product is still weeks from arriving) front-loads full
+    first-week launch volume that hasn't happened yet — the main driver of the
+    ~2.15x preorder over-forecast. Instead we forecast the slow pre-order rate
+    up to the arrival date, then start the curve's day 0 on that date.
+    See FORECAST_FIX_PLAN F4.
+
+    Args:
+        curve_params: (amplitude, decay_rate, baseline, ...) weekly curve
+        scale_factor: per-product multiplier for the post-arrival curve envelope
+        days_until_arrival: calendar days from today until expected arrival
+        preorder_daily_rate: observed pre-order units/day (trickle)
+        horizon_days: forecast horizon length
+
+    Returns:
+        array of daily forecast values of length horizon_days
+    """
+    amplitude, decay_rate, baseline = curve_params[:3]
+    forecasts = np.zeros(horizon_days)
+
+    # Clamp the arrival offset into the horizon
+    dua = int(max(0, min(days_until_arrival, horizon_days)))
+
+    # Pre-arrival segment: flat pre-order trickle, capped at the curve's scaled
+    # week-0 daily value (a pre-order day shouldn't out-sell the launch peak).
+    if dua > 0:
+        week0_daily = (amplitude / 7.0) * scale_factor + (baseline / 7.0)
+        pre_rate = preorder_daily_rate
+        if week0_daily > 0:
+            pre_rate = min(pre_rate, week0_daily)
+        forecasts[:dua] = max(0.0, pre_rate)
+
+    # Post-arrival segment: scaled launch curve, curve day 0 = arrival date.
+    if dua < horizon_days:
+        curve_part = forecast_from_curve(curve_params, scale_factor, 0, horizon_days - dua)
+        forecasts[dua:] = curve_part
+
+    return forecasts
+
+
 # ---------------------------------------------------------------------------
 # Batch data loading (eliminates N+1 per-product queries)
 # ---------------------------------------------------------------------------
@@ -651,9 +697,11 @@ def batch_load_product_data(conn, products):
    data = {
        'preorder_sales': {},
        'preorder_days': {},
+        'preorder_arrival_days': {},
        'launch_sales': {},
        'decay_velocity': {},
        'mature_history': {},
+        'dormant_rate': {},
    }

    # Pre-order sales: orders placed BEFORE first received date
@@ -677,6 +725,39 @@ def batch_load_product_data(conn, products):
            data['preorder_days'][int(row['pid'])] = float(row['preorder_days'])
        log.info(f"Batch loaded pre-order sales for {len(data['preorder_sales'])}/{len(preorder_pids)} preorder products")

+        # Expected arrival per pre-order product, to time the launch curve.
+        # Prefer the soonest FUTURE expected_date on an open PO; if the only open
+        # PO has a past expected_date assume 7 days; if there's no open PO at all
+        # assume 14 days. See FORECAST_FIX_PLAN F4.
+        arrival_sql = """
+        SELECT pid,
+            MIN(expected_date) FILTER (
+                WHERE expected_date IS NOT NULL AND expected_date >= CURRENT_DATE
+            ) AS future_arrival
+        FROM purchase_orders
+        WHERE pid = ANY(%s)
+          AND status IN ('created', 'ordered', 'electronically_sent', 'receiving_started')
+        GROUP BY pid
+        """
+        adf = execute_query(conn, arrival_sql, [preorder_pids])
+        today = date.today()
+        for _, row in adf.iterrows():
+            pid = int(row['pid'])
+            fa = row['future_arrival']
+            if pd.notna(fa):
+                fa_date = pd.Timestamp(fa).date()
+                data['preorder_arrival_days'][pid] = max(0, (fa_date - today).days)
+            else:
+                data['preorder_arrival_days'][pid] = 7  # open PO, expected_date already past
+        no_po = 0
+        for pid in preorder_pids:
+            if int(pid) not in data['preorder_arrival_days']:
+                data['preorder_arrival_days'][int(pid)] = 14  # no open PO at all
+                no_po += 1
+        log.info(f"Batch loaded preorder arrival for "
+                 f"{len(data['preorder_arrival_days']) - no_po}/{len(preorder_pids)} via open POs, "
+                 f"{no_po} defaulted to 14d")
+
    # Launch sales: first 14 days after first received
    launch_pids = products[products['phase'] == 'launch']['pid'].tolist()
    if launch_pids:
@@ -694,15 +775,23 @@ def batch_load_product_data(conn, products):
            data['launch_sales'][int(row['pid'])] = float(row['total_sold'])
        log.info(f"Batch loaded launch sales for {len(data['launch_sales'])}/{len(launch_pids)} launch products")

-    # Decay recent velocity: average daily sales over last 30 days
+    # Decay recent velocity: TRUE calendar-daily average over the last 30 days.
+    # We divide the summed units by calendar days (clipped to the product's age),
+    # NOT by the number of snapshot rows. Snapshots are sparse and mostly land on
+    # sold-days, so AVG(units_sold) averages over sold-days only and inflated the
+    # decay rate ~4x (measured 1.353 vs true 0.332 units/day). See FORECAST_FIX_PLAN F1.
    decay_pids = products[products['phase'] == 'decay']['pid'].tolist()
    if decay_pids:
        sql = """
-        SELECT dps.pid, AVG(COALESCE(dps.units_sold, 0)) AS avg_daily
+        SELECT dps.pid,
+            SUM(COALESCE(dps.units_sold, 0))::float
+              / GREATEST(LEAST(30, (CURRENT_DATE - pm.date_first_received::date)), 1) AS avg_daily
        FROM daily_product_snapshots dps
+        JOIN product_metrics pm ON pm.pid = dps.pid
        WHERE dps.pid = ANY(%s)
          AND dps.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
-        GROUP BY dps.pid
+          AND dps.snapshot_date >= pm.date_first_received::date
+        GROUP BY dps.pid, pm.date_first_received
        """
        df = execute_query(conn, sql, [decay_pids])
        for _, row in df.iterrows():
@@ -724,6 +813,25 @@ def batch_load_product_data(conn, products):
            data['mature_history'][int(pid)] = group.copy()
        log.info(f"Batch loaded history for {len(data['mature_history'])}/{len(mature_pids)} mature products")

+    # Dormant trailing order rate: dormant products forecast 0 by default, but
+    # ~11K of them still sell (restocks, promos, long-tail) — ~11% of all demand
+    # currently forecast as a hard zero. Load a trailing-180-day daily order rate
+    # so the dormant branch can carry a small positive rate. See FORECAST_FIX_PLAN F5.
+    dormant_pids = products[products['phase'] == 'dormant']['pid'].tolist()
+    if dormant_pids:
+        sql = """
+        SELECT o.pid, SUM(o.quantity) / 180.0 AS rate
+        FROM orders o
+        WHERE o.pid = ANY(%s)
+          AND o.canceled IS DISTINCT FROM TRUE
+          AND o.date >= CURRENT_DATE - INTERVAL '180 days'
+        GROUP BY o.pid
+        """
+        df = execute_query(conn, sql, [dormant_pids])
+        for _, row in df.iterrows():
+            data['dormant_rate'][int(row['pid'])] = float(row['rate'])
+        log.info(f"Batch loaded dormant order rate for {len(data['dormant_rate'])}/{len(dormant_pids)} dormant products")
+
    return data


@@ -829,11 +937,20 @@ def forecast_mature(product, history_df):
        # Not enough data — flat velocity
        return np.full(FORECAST_HORIZON_DAYS, velocity)

-    # Fill date gaps with 0 sales (days where product had no snapshot = no sales)
+    # Reindex over the FULL calendar window ending yesterday, not just the span
+    # between the first and last snapshot. resample() only covers first→last
+    # snapshot, so leading/trailing quiet periods are absent and the Holt level
+    # is fitted only on the product's busy span (can run ~4x too high). An
+    # explicit reindex fills every quiet calendar day with 0. (pid, snapshot_date)
+    # is unique so there is no duplicate-index risk; do NOT use combine_first
+    # (it keeps zeros over real data). See FORECAST_FIX_PLAN F2.
    hist = history_df.copy()
    hist['snapshot_date'] = pd.to_datetime(hist['snapshot_date'])
-    hist = hist.set_index('snapshot_date').resample('D').sum().fillna(0)
-    series = hist['units_sold'].values.astype(float)
+    hist = hist.set_index('snapshot_date')['units_sold']
+    full_index = pd.date_range(
+        end=pd.Timestamp(date.today() - timedelta(days=1)),
+        periods=EXP_SMOOTHING_WINDOW, freq='D')
+    series = hist.reindex(full_index, fill_value=0.0).values.astype(float)

    # Need at least 2 non-zero values for smoothing
    if np.count_nonzero(series) < 2:
@@ -956,9 +1073,24 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
    today = date.today()
    forecast_dates = [today + timedelta(days=i) for i in range(FORECAST_HORIZON_DAYS)]

-    # Pre-compute DOW and seasonal multipliers for each forecast date
+    # Pre-compute DOW and seasonal multipliers for each forecast date.
+    # DOW multipliers stay ABSOLUTE — every calibration is a multi-week average
+    # and therefore DOW-neutral, so reshaping by absolute DOW indices is correct.
+    # Seasonal indices must be applied RELATIVE to the calibration period:
+    # each per-product calibration (decay velocity, mature Holt level, launch /
+    # preorder scale) is fitted on raw recent actuals that already embed the
+    # current month's seasonal level. Multiplying by the absolute target-month
+    # index double-counts seasonality (~25% over-forecast at the May→June sale
+    # transition, worse near November). Divide by the trailing-30-day average
+    # index so only the seasonal *change* from calibration to target applies.
+    # See FORECAST_FIX_PLAN F3.
    dow_multipliers = [dow_indices.get(d.isoweekday(), 1.0) for d in forecast_dates]
-    seasonal_multipliers = [monthly_indices.get(d.month, 1.0) for d in forecast_dates]
+    trailing = [today - timedelta(days=i) for i in range(1, 31)]
+    calibration_index = float(np.mean([monthly_indices.get(d.month, 1.0) for d in trailing]))
+    seasonal_multipliers = [
+        monthly_indices.get(d.month, 1.0) / max(calibration_index, 0.1)
+        for d in forecast_dates
+    ]

    # TRUNCATE before streaming writes
    with conn.cursor() as cur:
@@ -1002,9 +1134,33 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
        try:
            curve_info = get_curve_for_product(product, curves_df)

-            if phase in ('preorder', 'launch'):
+            if phase == 'preorder':
                if curve_info:
-                    scale = compute_scale_factor(phase, product, curve_info, batch_data)
+                    scale = compute_scale_factor('preorder', product, curve_info, batch_data)
+                    # Time the launch curve to expected arrival instead of
+                    # running it from today (F4). Pre-arrival days carry the
+                    # observed pre-order trickle rate.
+                    days_until_arrival = batch_data['preorder_arrival_days'].get(pid, 14)
+                    preorder_units = batch_data['preorder_sales'].get(pid, 0)
+                    preorder_days = batch_data['preorder_days'].get(pid, 1)
+                    preorder_daily_rate = preorder_units / max(preorder_days, 1)
+                    forecasts = forecast_preorder(
+                        curve_info, scale, days_until_arrival,
+                        preorder_daily_rate, FORECAST_HORIZON_DAYS)
+                    method = 'lifecycle_curve'
+                else:
+                    # No reliable curve — fall back to velocity if available
+                    velocity = product.get('sales_velocity_daily') or 0
+                    if velocity > 0:
+                        forecasts = np.full(FORECAST_HORIZON_DAYS, velocity)
+                        method = 'velocity'
+                    else:
+                        forecasts = forecast_dormant()
+                        method = 'zero'
+
+            elif phase == 'launch':
+                if curve_info:
+                    scale = compute_scale_factor('launch', product, curve_info, batch_data)
                    forecasts = forecast_from_curve(curve_info, scale, age, FORECAST_HORIZON_DAYS)
                    method = 'lifecycle_curve'
                else:
@@ -1038,8 +1194,16 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
                method = 'velocity'

            else:  # dormant
-                forecasts = forecast_dormant()
-                method = 'zero'
+                # Carry a small positive rate for dormant products that still
+                # trickle sales (restocks/promos/long-tail); only truly dead
+                # products stay at zero. See FORECAST_FIX_PLAN F5.
+                rate = batch_data['dormant_rate'].get(pid, 0)
+                if rate > 0:
+                    forecasts = np.full(FORECAST_HORIZON_DAYS, rate)
+                    method = 'velocity'
+                else:
+                    forecasts = forecast_dormant()
+                    method = 'zero'

            # Confidence interval: use accuracy-calibrated margins per phase
            base_margin = accuracy_margins.get(phase, 0.5)
@@ -1108,6 +1272,8 @@ def archive_forecasts(conn, run_id):
        """)
        cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_date ON product_forecasts_history(forecast_date)")
        cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_pid_date ON product_forecasts_history(pid, forecast_date)")
+        # Naive-baseline column for forecast value-added (FVA). See FORECAST_FIX_PLAN F8.
+        cur.execute("ALTER TABLE product_forecasts_history ADD COLUMN IF NOT EXISTS naive_units NUMERIC(10,2)")

        # Find the previous completed run (whose forecasts are still in product_forecasts)
        cur.execute("""
@@ -1124,15 +1290,27 @@ def archive_forecasts(conn, run_id):

        prev_run_id = prev_run[0]

-        # Archive only past-date forecasts (where actuals now exist)
+        # Archive only past-date forecasts (where actuals now exist). Attach the
+        # naive baseline (flat trailing-28-day daily average) at the same time so
+        # forecast value-added can be measured. See FORECAST_FIX_PLAN F8.
        cur.execute("""
            INSERT INTO product_forecasts_history
                (run_id, pid, forecast_date, forecast_units, forecast_revenue,
-                 lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at)
-            SELECT %s, pid, forecast_date, forecast_units, forecast_revenue,
-                lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at
-            FROM product_forecasts
-            WHERE forecast_date < CURRENT_DATE
+                 lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
+                 generated_at, naive_units)
+            SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
+                pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
+                pf.generated_at, COALESCE(nv.naive_daily, 0)
+            FROM product_forecasts pf
+            LEFT JOIN (
+                SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
+                FROM orders o
+                WHERE o.canceled IS DISTINCT FROM TRUE
+                  AND o.date >= CURRENT_DATE - INTERVAL '28 days'
+                  AND o.date < CURRENT_DATE
+                GROUP BY o.pid
+            ) nv ON nv.pid = pf.pid
+            WHERE pf.forecast_date < CURRENT_DATE
            ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
        """, (prev_run_id,))

@@ -1154,6 +1332,48 @@ def archive_forecasts(conn, run_id):
        return archived


+def archive_future_leads(conn, run_id):
+    """
+    Archive a sampled set of FUTURE-lead forecasts from the just-generated
+    product_forecasts, attributed to the current run.
+
+    The past-date archive in archive_forecasts() only ever captures the 1-day
+    slice that just elapsed, so every accuracy sample lands in the '1-7d' lead
+    bucket and the 15/30/60/90-day forecasts that purchasing actually rides on
+    are never validated. Here we snapshot the 7/14/30/60/89-day-ahead leads
+    (non-dormant) so that, once each date passes, compute_accuracy() can score
+    them in their lead bucket. The naive baseline is attached the same way as in
+    the past-date path. Future-dated rows survive the 90-day prune until their
+    own date passes. See FORECAST_FIX_PLAN F7.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""
+            INSERT INTO product_forecasts_history
+                (run_id, pid, forecast_date, forecast_units, forecast_revenue,
+                 lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
+                 generated_at, naive_units)
+            SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
+                pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
+                pf.generated_at, COALESCE(nv.naive_daily, 0)
+            FROM product_forecasts pf
+            LEFT JOIN (
+                SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
+                FROM orders o
+                WHERE o.canceled IS DISTINCT FROM TRUE
+                  AND o.date >= CURRENT_DATE - INTERVAL '28 days'
+                  AND o.date < CURRENT_DATE
+                GROUP BY o.pid
+            ) nv ON nv.pid = pf.pid
+            WHERE pf.lifecycle_phase != 'dormant'
+              AND pf.forecast_date - CURRENT_DATE IN (7, 14, 30, 60, 89)
+            ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
+        """, (run_id,))
+        archived = cur.rowcount
+    conn.commit()
+    log.info(f"Archived {archived} future-lead forecast rows (7/14/30/60/89d) for run {run_id}")
+    return archived
+
+
 def compute_accuracy(conn, run_id):
    """
    Compute forecast accuracy metrics from archived history vs. actual sales.
@@ -1162,11 +1382,18 @@ def compute_accuracy(conn, run_id):
    (pid, forecast_date = snapshot_date) to compare forecasted vs. actual units.

    Stores results in forecast_accuracy table, broken down by:
-      - overall: single aggregate row
+      - overall: two rows — 'all' (non-dormant) and 'all_incl_dormant' (F5)
+      - overall_weekly: per-product weekly-grain WMAPE — the informative headline
+        for intermittent demand (daily grain has a ~190% floor) (F9)
      - by_phase: per lifecycle phase
-      - by_lead_time: bucketed by how far ahead the forecast was
+      - by_lead_time: bucketed by how far ahead the forecast was — long-lead
+        buckets populate as the future-lead archives mature (F7)
      - by_method: per forecast method
      - daily: per forecast_date (for trend charts)
+
+    Every dimension also stores naive_wmape (flat trailing-28d baseline) and
+    fva = 1 - wmape/naive_wmape, so the engine can be judged as value-over-naive
+    (F8). Only realized dates (forecast_date < CURRENT_DATE) are scored.
    """
    with conn.cursor() as cur:
        # Ensure accuracy table exists
@@ -1186,6 +1413,10 @@ def compute_accuracy(conn, run_id):
                PRIMARY KEY (run_id, metric_type, dimension_value)
            )
        """)
+        # Naive-baseline WMAPE and forecast value-added (FVA = 1 - wmape/naive_wmape).
+        # See FORECAST_FIX_PLAN F8.
+        cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS naive_wmape NUMERIC(10,4)")
+        cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS fva NUMERIC(10,4)")
        conn.commit()

        # Check if we have any history to analyze
@@ -1195,124 +1426,199 @@ def compute_accuracy(conn, run_id):
            log.info("No forecast history available for accuracy computation")
            return

-        # For each (pid, forecast_date) pair, keep only the most recent run's
-        # forecast row. This prevents double-counting when multiple runs have
-        # archived forecasts for the same product×date combination.
-        accuracy_cte = """
-        WITH ranked_history AS (
+        # Base CTEs (FORECAST_FIX_PLAN F7):
+        #  - Only score realized dates (forecast_date < CURRENT_DATE); future-lead
+        #    archives are excluded until their date passes.
+        #  - short_lead*: lead 0-6 deduped per (pid, forecast_date) — preserves the
+        #    meaning of the existing headline metrics. short_lead_eval keeps the
+        #    raw snapshot grid (incl. zero-zero days) for complete-week detection;
+        #    `accuracy` drops zero-zero days for daily-grain metrics.
+        #  - lead_dedup/lead_accuracy: deduped per (pid, forecast_date, lead_bucket)
+        #    so each long-lead bucket gets its own sample (the by_lead_time table).
+        base_cte = """
+        WITH ranked_all AS (
            SELECT
-                pfh.*,
+                pfh.pid, pfh.forecast_date, pfh.forecast_units, pfh.naive_units,
+                pfh.lifecycle_phase, pfh.forecast_method,
                fr.started_at,
-                ROW_NUMBER() OVER (
-                    PARTITION BY pfh.pid, pfh.forecast_date
-                    ORDER BY fr.started_at DESC
-                ) AS rn
+                (pfh.forecast_date - fr.started_at::date) AS lead_days,
+                CASE
+                    WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 0 AND 6 THEN '1-7d'
+                    WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 7 AND 13 THEN '8-14d'
+                    WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 14 AND 29 THEN '15-30d'
+                    WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 30 AND 59 THEN '31-60d'
+                    ELSE '61-90d'
+                END AS lead_bucket
            FROM product_forecasts_history pfh
            JOIN forecast_runs fr ON fr.id = pfh.run_id
+            WHERE pfh.forecast_date < CURRENT_DATE
+        ),
+        short_lead AS (
+            SELECT *,
+                ROW_NUMBER() OVER (
+                    PARTITION BY pid, forecast_date ORDER BY started_at DESC
+                ) AS rn
+            FROM ranked_all
+            WHERE lead_days BETWEEN 0 AND 6
+        ),
+        short_lead_eval AS (
+            SELECT sl.pid, sl.lifecycle_phase, sl.forecast_method, sl.forecast_date,
+                sl.forecast_units, sl.naive_units,
+                COALESCE(dps.units_sold, 0) AS actual_units,
+                (sl.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
+                ABS(sl.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
+            FROM short_lead sl
+            LEFT JOIN daily_product_snapshots dps
+                ON dps.pid = sl.pid AND dps.snapshot_date = sl.forecast_date
+            WHERE sl.rn = 1
        ),
        accuracy AS (
-            SELECT
-                rh.lifecycle_phase,
-                rh.forecast_method,
-                rh.forecast_date,
-                (rh.forecast_date - rh.started_at::date) AS lead_days,
-                rh.forecast_units,
+            SELECT * FROM short_lead_eval
+            WHERE NOT (forecast_units = 0 AND actual_units = 0)
+        ),
+        lead_dedup AS (
+            SELECT *,
+                ROW_NUMBER() OVER (
+                    PARTITION BY pid, forecast_date, lead_bucket ORDER BY started_at DESC
+                ) AS rn
+            FROM ranked_all
+        ),
+        lead_accuracy AS (
+            SELECT ld.lead_bucket, ld.forecast_units, ld.naive_units,
                COALESCE(dps.units_sold, 0) AS actual_units,
-                (rh.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
-                ABS(rh.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
-            FROM ranked_history rh
+                (ld.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
+                ABS(ld.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
+            FROM lead_dedup ld
            LEFT JOIN daily_product_snapshots dps
-                ON dps.pid = rh.pid AND dps.snapshot_date = rh.forecast_date
-            WHERE rh.rn = 1
-              AND NOT (rh.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
+                ON dps.pid = ld.pid AND dps.snapshot_date = ld.forecast_date
+            WHERE ld.rn = 1
+              AND ld.lifecycle_phase != 'dormant'
+              AND NOT (ld.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
        )
        """

-        # Compute and insert metrics for each dimension
-        dimensions = {
-            'overall': "SELECT 'all' AS dim",
-            'by_phase': "SELECT DISTINCT lifecycle_phase AS dim FROM accuracy",
-            'by_lead_time': """
-                SELECT DISTINCT
-                    CASE
-                        WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
-                        WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
-                        WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
-                        WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
-                        ELSE '61-90d'
-                    END AS dim
-                FROM accuracy
-            """,
-            'by_method': "SELECT DISTINCT forecast_method AS dim FROM accuracy",
-            'daily': "SELECT DISTINCT forecast_date::text AS dim FROM accuracy",
-        }
-
-        filter_clauses = {
-            'overall': "lifecycle_phase != 'dormant'",
-            'by_phase': "lifecycle_phase = dims.dim",
-            'by_lead_time': """
-                CASE
-                    WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
-                    WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
-                    WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
-                    WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
-                    ELSE '61-90d'
-                END = dims.dim
-            """,
-            'by_method': "forecast_method = dims.dim",
-            'daily': "forecast_date::text = dims.dim",
-        }
-
-        total_inserted = 0
-
-        for metric_type, dim_query in dimensions.items():
-            filter_clause = filter_clauses[metric_type]
-
-            sql = f"""
-            {accuracy_cte},
-            dims AS ({dim_query})
+        # Daily-grain aggregate over a source CTE aliased `a`, computing the
+        # engine WMAPE plus the naive-baseline WMAPE (NULL-safe: rows archived
+        # before F8 have naive_units NULL and are excluded from the naive sums).
+        def daily_agg(dim_expr, source, where=None, group_by=None):
+            where_sql = f"WHERE {where}" if where else ""
+            group_sql = f"GROUP BY {group_by}" if group_by else ""
+            return f"""
            SELECT
-                dims.dim,
+                {dim_expr} AS dim,
                COUNT(*) AS sample_size,
                COALESCE(SUM(a.actual_units), 0) AS total_actual,
                COALESCE(SUM(a.forecast_units), 0) AS total_forecast,
                AVG(a.abs_error) AS mae,
                CASE WHEN SUM(a.actual_units) > 0
-                    THEN SUM(a.abs_error) / SUM(a.actual_units)
-                    ELSE NULL END AS wmape,
+                    THEN SUM(a.abs_error) / SUM(a.actual_units) ELSE NULL END AS wmape,
                AVG(a.error) AS bias,
-                SQRT(AVG(POWER(a.error, 2))) AS rmse
-            FROM dims
-            CROSS JOIN accuracy a
-            WHERE {filter_clause}
-            GROUP BY dims.dim
+                SQRT(AVG(POWER(a.error, 2))) AS rmse,
+                CASE WHEN SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL) > 0
+                    THEN SUM(ABS(a.naive_units - a.actual_units)) FILTER (WHERE a.naive_units IS NOT NULL)
+                         / SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL)
+                    ELSE NULL END AS naive_wmape
+            FROM {source} a
+            {where_sql}
+            {group_sql}
            """

-            cur.execute(sql)
-            rows = cur.fetchall()
+        insert_sql = """
+            INSERT INTO forecast_accuracy
+                (run_id, metric_type, dimension_value, sample_size,
+                 total_actual_units, total_forecast_units, mae, wmape, bias, rmse,
+                 naive_wmape, fva)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+            ON CONFLICT (run_id, metric_type, dimension_value)
+            DO UPDATE SET
+                sample_size = EXCLUDED.sample_size,
+                total_actual_units = EXCLUDED.total_actual_units,
+                total_forecast_units = EXCLUDED.total_forecast_units,
+                mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
+                bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
+                naive_wmape = EXCLUDED.naive_wmape, fva = EXCLUDED.fva,
+                computed_at = NOW()
+        """

-            for row in rows:
-                dim_val, sample_size, total_actual, total_forecast, mae, wmape, bias, rmse = row
-                cur.execute("""
-                    INSERT INTO forecast_accuracy
-                        (run_id, metric_type, dimension_value, sample_size,
-                         total_actual_units, total_forecast_units, mae, wmape, bias, rmse)
-                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-                    ON CONFLICT (run_id, metric_type, dimension_value)
-                    DO UPDATE SET
-                        sample_size = EXCLUDED.sample_size,
-                        total_actual_units = EXCLUDED.total_actual_units,
-                        total_forecast_units = EXCLUDED.total_forecast_units,
-                        mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
-                        bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
-                        computed_at = NOW()
-                """, (run_id, metric_type, dim_val, sample_size,
-                      float(total_actual), float(total_forecast),
-                      float(mae) if mae is not None else None,
-                      float(wmape) if wmape is not None else None,
-                      float(bias) if bias is not None else None,
-                      float(rmse) if rmse is not None else None))
-                total_inserted += 1
+        def _f(x):
+            return float(x) if x is not None else None
+
+        def run_and_insert(metric_type, sql):
+            cur.execute(base_cte + sql)
+            n = 0
+            for row in cur.fetchall():
+                (dim_val, sample_size, total_actual, total_forecast,
+                 mae, wmape, bias, rmse, naive_wmape) = row
+                fva = None
+                if wmape is not None and naive_wmape is not None and float(naive_wmape) > 0:
+                    fva = 1.0 - float(wmape) / float(naive_wmape)
+                cur.execute(insert_sql, (
+                    run_id, metric_type, dim_val, sample_size,
+                    _f(total_actual), _f(total_forecast), _f(mae), _f(wmape),
+                    _f(bias), _f(rmse), _f(naive_wmape), _f(fva)))
+                n += 1
+            return n
+
+        total_inserted = 0
+
+        # overall: two rows — 'all' (non-dormant, the headline) and
+        # 'all_incl_dormant' (everything, so the ~11% dormant demand stops being
+        # invisible). Both are short-lead (lead 0-6). F5.
+        overall_source = """(
+                SELECT a.*, 'all'::text AS dim FROM accuracy a WHERE a.lifecycle_phase != 'dormant'
+                UNION ALL
+                SELECT a.*, 'all_incl_dormant'::text AS dim FROM accuracy a
+            )"""
+        total_inserted += run_and_insert('overall',
+            daily_agg('a.dim', overall_source, group_by='a.dim'))
+
+        # by_phase / by_method / daily — short-lead daily-grain over `accuracy`.
+        total_inserted += run_and_insert('by_phase',
+            daily_agg('a.lifecycle_phase', 'accuracy', group_by='a.lifecycle_phase'))
+        total_inserted += run_and_insert('by_method',
+            daily_agg('a.forecast_method', 'accuracy', group_by='a.forecast_method'))
+        total_inserted += run_and_insert('daily',
+            daily_agg('a.forecast_date::text', 'accuracy',
+                      where="a.lifecycle_phase != 'dormant'", group_by='a.forecast_date'))
+
+        # by_lead_time — one sample per (pid, date, lead bucket) over `lead_accuracy`.
+        # Buckets beyond '1-7d' populate as the future-lead archives (F7) mature.
+        total_inserted += run_and_insert('by_lead_time',
+            daily_agg('a.lead_bucket', 'lead_accuracy', group_by='a.lead_bucket'))
+
+        # overall_weekly — the informative headline for intermittent retail demand.
+        # Aggregate the short-lead rows to (pid, complete week), then WMAPE over
+        # pid-weeks. Daily-grain WMAPE has a ~190% floor on this catalog; weekly
+        # grain is ~109% and responds to real improvement. F9.
+        weekly_sql = """,
+        weekly AS (
+            SELECT pid, date_trunc('week', forecast_date) AS wk,
+                SUM(forecast_units) AS fc_week,
+                SUM(actual_units) AS act_week,
+                SUM(naive_units) AS naive_week,
+                bool_and(naive_units IS NOT NULL) AS naive_complete
+            FROM short_lead_eval
+            WHERE lifecycle_phase != 'dormant'
+            GROUP BY pid, date_trunc('week', forecast_date)
+            HAVING COUNT(*) = 7
+        )
+        SELECT 'all'::text AS dim,
+            COUNT(*) AS sample_size,
+            COALESCE(SUM(act_week), 0) AS total_actual,
+            COALESCE(SUM(fc_week), 0) AS total_forecast,
+            AVG(ABS(fc_week - act_week)) AS mae,
+            CASE WHEN SUM(act_week) > 0
+                THEN SUM(ABS(fc_week - act_week)) / SUM(act_week) ELSE NULL END AS wmape,
+            AVG(fc_week - act_week) AS bias,
+            SQRT(AVG(POWER(fc_week - act_week, 2))) AS rmse,
+            CASE WHEN SUM(act_week) FILTER (WHERE naive_complete) > 0
+                THEN SUM(ABS(naive_week - act_week)) FILTER (WHERE naive_complete)
+                     / SUM(act_week) FILTER (WHERE naive_complete)
+                ELSE NULL END AS naive_wmape
+        FROM weekly
+        WHERE NOT (fc_week = 0 AND act_week = 0)
+        """
+        total_inserted += run_and_insert('overall_weekly', weekly_sql)

        conn.commit()

@@ -1562,6 +1868,10 @@ def main():
            conn, curves_df, dow_indices, monthly_indices, accuracy_margins
        )

+        # Phase 4b: Snapshot sampled future-lead forecasts (7/14/30/60/89d) from
+        # the fresh run so long-lead accuracy populates once those dates pass (F7).
+        archive_future_leads(conn, run_id)
+
        duration = time.time() - start_time

        # Record run completion (include DOW indices in metadata)
@@ -357,6 +357,9 @@ router.get('/forecast/metrics', async (req, res) => {

            const active = parseInt(totals.active_products) || 1;
            const curveProducts = parseInt(totals.curve_products) || 0;
+            // NOTE: despite the name, this is "share of active products forecast via
+            // lifecycle curves" (curve coverage), NOT a statistical confidence. It only
+            // feeds a per-day tooltip field. See FORECAST_FIX_PLAN F9 (point 4).
            const confidenceLevel = parseFloat((curveProducts / active).toFixed(2));

            // Daily series from actual forecast
@@ -687,14 +690,29 @@ router.get('/forecast/accuracy', async (req, res) => {
        const { rows: metrics } = await executeQuery(`
            SELECT metric_type, dimension_value, sample_size,
                total_actual_units, total_forecast_units,
-                mae, wmape, bias, rmse
+                mae, wmape, bias, rmse, naive_wmape, fva
            FROM forecast_accuracy
            WHERE run_id = $1
            ORDER BY metric_type, dimension_value
        `, [latestRunId]);

+        // Shared shaping for an "overall"-style aggregate row (daily or weekly grain).
+        const shapeOverall = (m) => m ? {
+            sampleSize: parseInt(m.sample_size),
+            totalActual: parseFloat(m.total_actual_units) || 0,
+            totalForecast: parseFloat(m.total_forecast_units) || 0,
+            mae: m.mae != null ? parseFloat(parseFloat(m.mae).toFixed(4)) : null,
+            wmape: m.wmape != null ? parseFloat((parseFloat(m.wmape) * 100).toFixed(1)) : null,
+            bias: m.bias != null ? parseFloat(parseFloat(m.bias).toFixed(4)) : null,
+            rmse: m.rmse != null ? parseFloat(parseFloat(m.rmse).toFixed(4)) : null,
+            naiveWmape: m.naive_wmape != null ? parseFloat((parseFloat(m.naive_wmape) * 100).toFixed(1)) : null,
+            fva: m.fva != null ? parseFloat(parseFloat(m.fva).toFixed(3)) : null,
+        } : null;
+
        // Organize into response structure
-        const overall = metrics.find(m => m.metric_type === 'overall');
+        const overall = metrics.find(m => m.metric_type === 'overall' && m.dimension_value === 'all')
+        const overallInclDormant = metrics.find(m => m.metric_type === 'overall' && m.dimension_value === 'all_incl_dormant')
+        const overallWeekly = metrics.find(m => m.metric_type === 'overall_weekly');
        const byPhase = metrics
            .filter(m => m.metric_type === 'by_phase')
            .map(m => ({
@@ -706,6 +724,8 @@ router.get('/forecast/accuracy', async (req, res) => {
                wmape: m.wmape != null ? parseFloat((parseFloat(m.wmape) * 100).toFixed(1)) : null,
                bias: m.bias != null ? parseFloat(parseFloat(m.bias).toFixed(4)) : null,
                rmse: m.rmse != null ? parseFloat(parseFloat(m.rmse).toFixed(4)) : null,
+                naiveWmape: m.naive_wmape != null ? parseFloat((parseFloat(m.naive_wmape) * 100).toFixed(1)) : null,
+                fva: m.fva != null ? parseFloat(parseFloat(m.fva).toFixed(3)) : null,
            }))
            .sort((a, b) => (b.totalActual || 0) - (a.totalActual || 0));

@@ -763,6 +783,26 @@ router.get('/forecast/accuracy', async (req, res) => {
            sampleSize: parseInt(r.sample_size),
        }));

+        // Weekly-grain trend across runs (starts empty for old runs that predate
+        // the overall_weekly metric — that's expected, no backfill). F9.
+        const { rows: weeklyTrendRows } = await executeQuery(`
+            SELECT fr.finished_at::date AS run_date,
+                fa.wmape, fa.naive_wmape, fa.fva, fa.sample_size
+            FROM forecast_accuracy fa
+            JOIN forecast_runs fr ON fr.id = fa.run_id
+            WHERE fa.metric_type = 'overall_weekly'
+              AND fa.dimension_value = 'all'
+            ORDER BY fr.finished_at
+        `);
+
+        const accuracyTrendWeekly = weeklyTrendRows.map(r => ({
+            date: r.run_date instanceof Date ? r.run_date.toISOString().split('T')[0] : r.run_date,
+            wmape: r.wmape != null ? parseFloat((parseFloat(r.wmape) * 100).toFixed(1)) : null,
+            naiveWmape: r.naive_wmape != null ? parseFloat((parseFloat(r.naive_wmape) * 100).toFixed(1)) : null,
+            fva: r.fva != null ? parseFloat(parseFloat(r.fva).toFixed(3)) : null,
+            sampleSize: parseInt(r.sample_size),
+        }));
+
        res.json({
            hasData: true,
            computedAt,
@@ -775,20 +815,15 @@ router.get('/forecast/accuracy', async (req, res) => {
                    ? historyInfo.latest_date.toISOString().split('T')[0]
                    : historyInfo.latest_date,
            },
-            overall: overall ? {
-                sampleSize: parseInt(overall.sample_size),
-                totalActual: parseFloat(overall.total_actual_units) || 0,
-                totalForecast: parseFloat(overall.total_forecast_units) || 0,
-                mae: overall.mae != null ? parseFloat(parseFloat(overall.mae).toFixed(4)) : null,
-                wmape: overall.wmape != null ? parseFloat((parseFloat(overall.wmape) * 100).toFixed(1)) : null,
-                bias: overall.bias != null ? parseFloat(parseFloat(overall.bias).toFixed(4)) : null,
-                rmse: overall.rmse != null ? parseFloat(parseFloat(overall.rmse).toFixed(4)) : null,
-            } : null,
+            overall: shapeOverall(overall),
+            overallInclDormant: shapeOverall(overallInclDormant),
+            overallWeekly: shapeOverall(overallWeekly),
            byPhase,
            byLeadTime,
            byMethod,
            dailyTrend,
            accuracyTrend,
+            accuracyTrendWeekly,
        });
    } catch (err) {
        console.error('Error fetching forecast accuracy:', err);