Forecast improvements

This commit is contained in:
2026-06-11 14:55:33 -04:00
parent 9ff744399f
commit 3b2f51e6b8
5 changed files with 887 additions and 138 deletions
@@ -634,6 +634,52 @@ def forecast_from_curve(curve_params, scale_factor, age_days, horizon_days):
return np.array(forecasts)
def forecast_preorder(curve_params, scale_factor, days_until_arrival,
preorder_daily_rate, horizon_days):
"""
Piecewise pre-order forecast: a flat observed pre-order trickle until the
product is expected to arrive, then the scaled launch curve from age 0.
The launch curve was fit on POST-receipt order history, so running it from
today (while the product is still weeks from arriving) front-loads full
first-week launch volume that hasn't happened yet — the main driver of the
~2.15x preorder over-forecast. Instead we forecast the slow pre-order rate
up to the arrival date, then start the curve's day 0 on that date.
See FORECAST_FIX_PLAN F4.
Args:
curve_params: (amplitude, decay_rate, baseline, ...) weekly curve
scale_factor: per-product multiplier for the post-arrival curve envelope
days_until_arrival: calendar days from today until expected arrival
preorder_daily_rate: observed pre-order units/day (trickle)
horizon_days: forecast horizon length
Returns:
array of daily forecast values of length horizon_days
"""
amplitude, decay_rate, baseline = curve_params[:3]
forecasts = np.zeros(horizon_days)
# Clamp the arrival offset into the horizon
dua = int(max(0, min(days_until_arrival, horizon_days)))
# Pre-arrival segment: flat pre-order trickle, capped at the curve's scaled
# week-0 daily value (a pre-order day shouldn't out-sell the launch peak).
if dua > 0:
week0_daily = (amplitude / 7.0) * scale_factor + (baseline / 7.0)
pre_rate = preorder_daily_rate
if week0_daily > 0:
pre_rate = min(pre_rate, week0_daily)
forecasts[:dua] = max(0.0, pre_rate)
# Post-arrival segment: scaled launch curve, curve day 0 = arrival date.
if dua < horizon_days:
curve_part = forecast_from_curve(curve_params, scale_factor, 0, horizon_days - dua)
forecasts[dua:] = curve_part
return forecasts
# ---------------------------------------------------------------------------
# Batch data loading (eliminates N+1 per-product queries)
# ---------------------------------------------------------------------------
@@ -651,9 +697,11 @@ def batch_load_product_data(conn, products):
data = {
'preorder_sales': {},
'preorder_days': {},
'preorder_arrival_days': {},
'launch_sales': {},
'decay_velocity': {},
'mature_history': {},
'dormant_rate': {},
}
# Pre-order sales: orders placed BEFORE first received date
@@ -677,6 +725,39 @@ def batch_load_product_data(conn, products):
data['preorder_days'][int(row['pid'])] = float(row['preorder_days'])
log.info(f"Batch loaded pre-order sales for {len(data['preorder_sales'])}/{len(preorder_pids)} preorder products")
# Expected arrival per pre-order product, to time the launch curve.
# Prefer the soonest FUTURE expected_date on an open PO; if the only open
# PO has a past expected_date assume 7 days; if there's no open PO at all
# assume 14 days. See FORECAST_FIX_PLAN F4.
arrival_sql = """
SELECT pid,
MIN(expected_date) FILTER (
WHERE expected_date IS NOT NULL AND expected_date >= CURRENT_DATE
) AS future_arrival
FROM purchase_orders
WHERE pid = ANY(%s)
AND status IN ('created', 'ordered', 'electronically_sent', 'receiving_started')
GROUP BY pid
"""
adf = execute_query(conn, arrival_sql, [preorder_pids])
today = date.today()
for _, row in adf.iterrows():
pid = int(row['pid'])
fa = row['future_arrival']
if pd.notna(fa):
fa_date = pd.Timestamp(fa).date()
data['preorder_arrival_days'][pid] = max(0, (fa_date - today).days)
else:
data['preorder_arrival_days'][pid] = 7 # open PO, expected_date already past
no_po = 0
for pid in preorder_pids:
if int(pid) not in data['preorder_arrival_days']:
data['preorder_arrival_days'][int(pid)] = 14 # no open PO at all
no_po += 1
log.info(f"Batch loaded preorder arrival for "
f"{len(data['preorder_arrival_days']) - no_po}/{len(preorder_pids)} via open POs, "
f"{no_po} defaulted to 14d")
# Launch sales: first 14 days after first received
launch_pids = products[products['phase'] == 'launch']['pid'].tolist()
if launch_pids:
@@ -694,15 +775,23 @@ def batch_load_product_data(conn, products):
data['launch_sales'][int(row['pid'])] = float(row['total_sold'])
log.info(f"Batch loaded launch sales for {len(data['launch_sales'])}/{len(launch_pids)} launch products")
# Decay recent velocity: average daily sales over last 30 days
# Decay recent velocity: TRUE calendar-daily average over the last 30 days.
# We divide the summed units by calendar days (clipped to the product's age),
# NOT by the number of snapshot rows. Snapshots are sparse and mostly land on
# sold-days, so AVG(units_sold) averages over sold-days only and inflated the
# decay rate ~4x (measured 1.353 vs true 0.332 units/day). See FORECAST_FIX_PLAN F1.
decay_pids = products[products['phase'] == 'decay']['pid'].tolist()
if decay_pids:
sql = """
SELECT dps.pid, AVG(COALESCE(dps.units_sold, 0)) AS avg_daily
SELECT dps.pid,
SUM(COALESCE(dps.units_sold, 0))::float
/ GREATEST(LEAST(30, (CURRENT_DATE - pm.date_first_received::date)), 1) AS avg_daily
FROM daily_product_snapshots dps
JOIN product_metrics pm ON pm.pid = dps.pid
WHERE dps.pid = ANY(%s)
AND dps.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY dps.pid
AND dps.snapshot_date >= pm.date_first_received::date
GROUP BY dps.pid, pm.date_first_received
"""
df = execute_query(conn, sql, [decay_pids])
for _, row in df.iterrows():
@@ -724,6 +813,25 @@ def batch_load_product_data(conn, products):
data['mature_history'][int(pid)] = group.copy()
log.info(f"Batch loaded history for {len(data['mature_history'])}/{len(mature_pids)} mature products")
# Dormant trailing order rate: dormant products forecast 0 by default, but
# ~11K of them still sell (restocks, promos, long-tail) — ~11% of all demand
# currently forecast as a hard zero. Load a trailing-180-day daily order rate
# so the dormant branch can carry a small positive rate. See FORECAST_FIX_PLAN F5.
dormant_pids = products[products['phase'] == 'dormant']['pid'].tolist()
if dormant_pids:
sql = """
SELECT o.pid, SUM(o.quantity) / 180.0 AS rate
FROM orders o
WHERE o.pid = ANY(%s)
AND o.canceled IS DISTINCT FROM TRUE
AND o.date >= CURRENT_DATE - INTERVAL '180 days'
GROUP BY o.pid
"""
df = execute_query(conn, sql, [dormant_pids])
for _, row in df.iterrows():
data['dormant_rate'][int(row['pid'])] = float(row['rate'])
log.info(f"Batch loaded dormant order rate for {len(data['dormant_rate'])}/{len(dormant_pids)} dormant products")
return data
@@ -829,11 +937,20 @@ def forecast_mature(product, history_df):
# Not enough data — flat velocity
return np.full(FORECAST_HORIZON_DAYS, velocity)
# Fill date gaps with 0 sales (days where product had no snapshot = no sales)
# Reindex over the FULL calendar window ending yesterday, not just the span
# between the first and last snapshot. resample() only covers first→last
# snapshot, so leading/trailing quiet periods are absent and the Holt level
# is fitted only on the product's busy span (can run ~4x too high). An
# explicit reindex fills every quiet calendar day with 0. (pid, snapshot_date)
# is unique so there is no duplicate-index risk; do NOT use combine_first
# (it keeps zeros over real data). See FORECAST_FIX_PLAN F2.
hist = history_df.copy()
hist['snapshot_date'] = pd.to_datetime(hist['snapshot_date'])
hist = hist.set_index('snapshot_date').resample('D').sum().fillna(0)
series = hist['units_sold'].values.astype(float)
hist = hist.set_index('snapshot_date')['units_sold']
full_index = pd.date_range(
end=pd.Timestamp(date.today() - timedelta(days=1)),
periods=EXP_SMOOTHING_WINDOW, freq='D')
series = hist.reindex(full_index, fill_value=0.0).values.astype(float)
# Need at least 2 non-zero values for smoothing
if np.count_nonzero(series) < 2:
@@ -956,9 +1073,24 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
today = date.today()
forecast_dates = [today + timedelta(days=i) for i in range(FORECAST_HORIZON_DAYS)]
# Pre-compute DOW and seasonal multipliers for each forecast date
# Pre-compute DOW and seasonal multipliers for each forecast date.
# DOW multipliers stay ABSOLUTE — every calibration is a multi-week average
# and therefore DOW-neutral, so reshaping by absolute DOW indices is correct.
# Seasonal indices must be applied RELATIVE to the calibration period:
# each per-product calibration (decay velocity, mature Holt level, launch /
# preorder scale) is fitted on raw recent actuals that already embed the
# current month's seasonal level. Multiplying by the absolute target-month
# index double-counts seasonality (~25% over-forecast at the May→June sale
# transition, worse near November). Divide by the trailing-30-day average
# index so only the seasonal *change* from calibration to target applies.
# See FORECAST_FIX_PLAN F3.
dow_multipliers = [dow_indices.get(d.isoweekday(), 1.0) for d in forecast_dates]
seasonal_multipliers = [monthly_indices.get(d.month, 1.0) for d in forecast_dates]
trailing = [today - timedelta(days=i) for i in range(1, 31)]
calibration_index = float(np.mean([monthly_indices.get(d.month, 1.0) for d in trailing]))
seasonal_multipliers = [
monthly_indices.get(d.month, 1.0) / max(calibration_index, 0.1)
for d in forecast_dates
]
# TRUNCATE before streaming writes
with conn.cursor() as cur:
@@ -1002,9 +1134,33 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
try:
curve_info = get_curve_for_product(product, curves_df)
if phase in ('preorder', 'launch'):
if phase == 'preorder':
if curve_info:
scale = compute_scale_factor(phase, product, curve_info, batch_data)
scale = compute_scale_factor('preorder', product, curve_info, batch_data)
# Time the launch curve to expected arrival instead of
# running it from today (F4). Pre-arrival days carry the
# observed pre-order trickle rate.
days_until_arrival = batch_data['preorder_arrival_days'].get(pid, 14)
preorder_units = batch_data['preorder_sales'].get(pid, 0)
preorder_days = batch_data['preorder_days'].get(pid, 1)
preorder_daily_rate = preorder_units / max(preorder_days, 1)
forecasts = forecast_preorder(
curve_info, scale, days_until_arrival,
preorder_daily_rate, FORECAST_HORIZON_DAYS)
method = 'lifecycle_curve'
else:
# No reliable curve — fall back to velocity if available
velocity = product.get('sales_velocity_daily') or 0
if velocity > 0:
forecasts = np.full(FORECAST_HORIZON_DAYS, velocity)
method = 'velocity'
else:
forecasts = forecast_dormant()
method = 'zero'
elif phase == 'launch':
if curve_info:
scale = compute_scale_factor('launch', product, curve_info, batch_data)
forecasts = forecast_from_curve(curve_info, scale, age, FORECAST_HORIZON_DAYS)
method = 'lifecycle_curve'
else:
@@ -1038,8 +1194,16 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
method = 'velocity'
else: # dormant
forecasts = forecast_dormant()
method = 'zero'
# Carry a small positive rate for dormant products that still
# trickle sales (restocks/promos/long-tail); only truly dead
# products stay at zero. See FORECAST_FIX_PLAN F5.
rate = batch_data['dormant_rate'].get(pid, 0)
if rate > 0:
forecasts = np.full(FORECAST_HORIZON_DAYS, rate)
method = 'velocity'
else:
forecasts = forecast_dormant()
method = 'zero'
# Confidence interval: use accuracy-calibrated margins per phase
base_margin = accuracy_margins.get(phase, 0.5)
@@ -1108,6 +1272,8 @@ def archive_forecasts(conn, run_id):
""")
cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_date ON product_forecasts_history(forecast_date)")
cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_pid_date ON product_forecasts_history(pid, forecast_date)")
# Naive-baseline column for forecast value-added (FVA). See FORECAST_FIX_PLAN F8.
cur.execute("ALTER TABLE product_forecasts_history ADD COLUMN IF NOT EXISTS naive_units NUMERIC(10,2)")
# Find the previous completed run (whose forecasts are still in product_forecasts)
cur.execute("""
@@ -1124,15 +1290,27 @@ def archive_forecasts(conn, run_id):
prev_run_id = prev_run[0]
# Archive only past-date forecasts (where actuals now exist)
# Archive only past-date forecasts (where actuals now exist). Attach the
# naive baseline (flat trailing-28-day daily average) at the same time so
# forecast value-added can be measured. See FORECAST_FIX_PLAN F8.
cur.execute("""
INSERT INTO product_forecasts_history
(run_id, pid, forecast_date, forecast_units, forecast_revenue,
lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at)
SELECT %s, pid, forecast_date, forecast_units, forecast_revenue,
lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at
FROM product_forecasts
WHERE forecast_date < CURRENT_DATE
lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
generated_at, naive_units)
SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
pf.generated_at, COALESCE(nv.naive_daily, 0)
FROM product_forecasts pf
LEFT JOIN (
SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
FROM orders o
WHERE o.canceled IS DISTINCT FROM TRUE
AND o.date >= CURRENT_DATE - INTERVAL '28 days'
AND o.date < CURRENT_DATE
GROUP BY o.pid
) nv ON nv.pid = pf.pid
WHERE pf.forecast_date < CURRENT_DATE
ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
""", (prev_run_id,))
@@ -1154,6 +1332,48 @@ def archive_forecasts(conn, run_id):
return archived
def archive_future_leads(conn, run_id):
"""
Archive a sampled set of FUTURE-lead forecasts from the just-generated
product_forecasts, attributed to the current run.
The past-date archive in archive_forecasts() only ever captures the 1-day
slice that just elapsed, so every accuracy sample lands in the '1-7d' lead
bucket and the 15/30/60/90-day forecasts that purchasing actually rides on
are never validated. Here we snapshot the 7/14/30/60/89-day-ahead leads
(non-dormant) so that, once each date passes, compute_accuracy() can score
them in their lead bucket. The naive baseline is attached the same way as in
the past-date path. Future-dated rows survive the 90-day prune until their
own date passes. See FORECAST_FIX_PLAN F7.
"""
with conn.cursor() as cur:
cur.execute("""
INSERT INTO product_forecasts_history
(run_id, pid, forecast_date, forecast_units, forecast_revenue,
lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
generated_at, naive_units)
SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
pf.generated_at, COALESCE(nv.naive_daily, 0)
FROM product_forecasts pf
LEFT JOIN (
SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
FROM orders o
WHERE o.canceled IS DISTINCT FROM TRUE
AND o.date >= CURRENT_DATE - INTERVAL '28 days'
AND o.date < CURRENT_DATE
GROUP BY o.pid
) nv ON nv.pid = pf.pid
WHERE pf.lifecycle_phase != 'dormant'
AND pf.forecast_date - CURRENT_DATE IN (7, 14, 30, 60, 89)
ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
""", (run_id,))
archived = cur.rowcount
conn.commit()
log.info(f"Archived {archived} future-lead forecast rows (7/14/30/60/89d) for run {run_id}")
return archived
def compute_accuracy(conn, run_id):
"""
Compute forecast accuracy metrics from archived history vs. actual sales.
@@ -1162,11 +1382,18 @@ def compute_accuracy(conn, run_id):
(pid, forecast_date = snapshot_date) to compare forecasted vs. actual units.
Stores results in forecast_accuracy table, broken down by:
- overall: single aggregate row
- overall: two rows — 'all' (non-dormant) and 'all_incl_dormant' (F5)
- overall_weekly: per-product weekly-grain WMAPE — the informative headline
for intermittent demand (daily grain has a ~190% floor) (F9)
- by_phase: per lifecycle phase
- by_lead_time: bucketed by how far ahead the forecast was
- by_lead_time: bucketed by how far ahead the forecast was — long-lead
buckets populate as the future-lead archives mature (F7)
- by_method: per forecast method
- daily: per forecast_date (for trend charts)
Every dimension also stores naive_wmape (flat trailing-28d baseline) and
fva = 1 - wmape/naive_wmape, so the engine can be judged as value-over-naive
(F8). Only realized dates (forecast_date < CURRENT_DATE) are scored.
"""
with conn.cursor() as cur:
# Ensure accuracy table exists
@@ -1186,6 +1413,10 @@ def compute_accuracy(conn, run_id):
PRIMARY KEY (run_id, metric_type, dimension_value)
)
""")
# Naive-baseline WMAPE and forecast value-added (FVA = 1 - wmape/naive_wmape).
# See FORECAST_FIX_PLAN F8.
cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS naive_wmape NUMERIC(10,4)")
cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS fva NUMERIC(10,4)")
conn.commit()
# Check if we have any history to analyze
@@ -1195,124 +1426,199 @@ def compute_accuracy(conn, run_id):
log.info("No forecast history available for accuracy computation")
return
# For each (pid, forecast_date) pair, keep only the most recent run's
# forecast row. This prevents double-counting when multiple runs have
# archived forecasts for the same product×date combination.
accuracy_cte = """
WITH ranked_history AS (
# Base CTEs (FORECAST_FIX_PLAN F7):
# - Only score realized dates (forecast_date < CURRENT_DATE); future-lead
# archives are excluded until their date passes.
# - short_lead*: lead 0-6 deduped per (pid, forecast_date) — preserves the
# meaning of the existing headline metrics. short_lead_eval keeps the
# raw snapshot grid (incl. zero-zero days) for complete-week detection;
# `accuracy` drops zero-zero days for daily-grain metrics.
# - lead_dedup/lead_accuracy: deduped per (pid, forecast_date, lead_bucket)
# so each long-lead bucket gets its own sample (the by_lead_time table).
base_cte = """
WITH ranked_all AS (
SELECT
pfh.*,
pfh.pid, pfh.forecast_date, pfh.forecast_units, pfh.naive_units,
pfh.lifecycle_phase, pfh.forecast_method,
fr.started_at,
ROW_NUMBER() OVER (
PARTITION BY pfh.pid, pfh.forecast_date
ORDER BY fr.started_at DESC
) AS rn
(pfh.forecast_date - fr.started_at::date) AS lead_days,
CASE
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 0 AND 6 THEN '1-7d'
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 7 AND 13 THEN '8-14d'
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 14 AND 29 THEN '15-30d'
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 30 AND 59 THEN '31-60d'
ELSE '61-90d'
END AS lead_bucket
FROM product_forecasts_history pfh
JOIN forecast_runs fr ON fr.id = pfh.run_id
WHERE pfh.forecast_date < CURRENT_DATE
),
short_lead AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY pid, forecast_date ORDER BY started_at DESC
) AS rn
FROM ranked_all
WHERE lead_days BETWEEN 0 AND 6
),
short_lead_eval AS (
SELECT sl.pid, sl.lifecycle_phase, sl.forecast_method, sl.forecast_date,
sl.forecast_units, sl.naive_units,
COALESCE(dps.units_sold, 0) AS actual_units,
(sl.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
ABS(sl.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
FROM short_lead sl
LEFT JOIN daily_product_snapshots dps
ON dps.pid = sl.pid AND dps.snapshot_date = sl.forecast_date
WHERE sl.rn = 1
),
accuracy AS (
SELECT
rh.lifecycle_phase,
rh.forecast_method,
rh.forecast_date,
(rh.forecast_date - rh.started_at::date) AS lead_days,
rh.forecast_units,
SELECT * FROM short_lead_eval
WHERE NOT (forecast_units = 0 AND actual_units = 0)
),
lead_dedup AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY pid, forecast_date, lead_bucket ORDER BY started_at DESC
) AS rn
FROM ranked_all
),
lead_accuracy AS (
SELECT ld.lead_bucket, ld.forecast_units, ld.naive_units,
COALESCE(dps.units_sold, 0) AS actual_units,
(rh.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
ABS(rh.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
FROM ranked_history rh
(ld.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
ABS(ld.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
FROM lead_dedup ld
LEFT JOIN daily_product_snapshots dps
ON dps.pid = rh.pid AND dps.snapshot_date = rh.forecast_date
WHERE rh.rn = 1
AND NOT (rh.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
ON dps.pid = ld.pid AND dps.snapshot_date = ld.forecast_date
WHERE ld.rn = 1
AND ld.lifecycle_phase != 'dormant'
AND NOT (ld.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
)
"""
# Compute and insert metrics for each dimension
dimensions = {
'overall': "SELECT 'all' AS dim",
'by_phase': "SELECT DISTINCT lifecycle_phase AS dim FROM accuracy",
'by_lead_time': """
SELECT DISTINCT
CASE
WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
ELSE '61-90d'
END AS dim
FROM accuracy
""",
'by_method': "SELECT DISTINCT forecast_method AS dim FROM accuracy",
'daily': "SELECT DISTINCT forecast_date::text AS dim FROM accuracy",
}
filter_clauses = {
'overall': "lifecycle_phase != 'dormant'",
'by_phase': "lifecycle_phase = dims.dim",
'by_lead_time': """
CASE
WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
ELSE '61-90d'
END = dims.dim
""",
'by_method': "forecast_method = dims.dim",
'daily': "forecast_date::text = dims.dim",
}
total_inserted = 0
for metric_type, dim_query in dimensions.items():
filter_clause = filter_clauses[metric_type]
sql = f"""
{accuracy_cte},
dims AS ({dim_query})
# Daily-grain aggregate over a source CTE aliased `a`, computing the
# engine WMAPE plus the naive-baseline WMAPE (NULL-safe: rows archived
# before F8 have naive_units NULL and are excluded from the naive sums).
def daily_agg(dim_expr, source, where=None, group_by=None):
where_sql = f"WHERE {where}" if where else ""
group_sql = f"GROUP BY {group_by}" if group_by else ""
return f"""
SELECT
dims.dim,
{dim_expr} AS dim,
COUNT(*) AS sample_size,
COALESCE(SUM(a.actual_units), 0) AS total_actual,
COALESCE(SUM(a.forecast_units), 0) AS total_forecast,
AVG(a.abs_error) AS mae,
CASE WHEN SUM(a.actual_units) > 0
THEN SUM(a.abs_error) / SUM(a.actual_units)
ELSE NULL END AS wmape,
THEN SUM(a.abs_error) / SUM(a.actual_units) ELSE NULL END AS wmape,
AVG(a.error) AS bias,
SQRT(AVG(POWER(a.error, 2))) AS rmse
FROM dims
CROSS JOIN accuracy a
WHERE {filter_clause}
GROUP BY dims.dim
SQRT(AVG(POWER(a.error, 2))) AS rmse,
CASE WHEN SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL) > 0
THEN SUM(ABS(a.naive_units - a.actual_units)) FILTER (WHERE a.naive_units IS NOT NULL)
/ SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL)
ELSE NULL END AS naive_wmape
FROM {source} a
{where_sql}
{group_sql}
"""
cur.execute(sql)
rows = cur.fetchall()
insert_sql = """
INSERT INTO forecast_accuracy
(run_id, metric_type, dimension_value, sample_size,
total_actual_units, total_forecast_units, mae, wmape, bias, rmse,
naive_wmape, fva)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (run_id, metric_type, dimension_value)
DO UPDATE SET
sample_size = EXCLUDED.sample_size,
total_actual_units = EXCLUDED.total_actual_units,
total_forecast_units = EXCLUDED.total_forecast_units,
mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
naive_wmape = EXCLUDED.naive_wmape, fva = EXCLUDED.fva,
computed_at = NOW()
"""
for row in rows:
dim_val, sample_size, total_actual, total_forecast, mae, wmape, bias, rmse = row
cur.execute("""
INSERT INTO forecast_accuracy
(run_id, metric_type, dimension_value, sample_size,
total_actual_units, total_forecast_units, mae, wmape, bias, rmse)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (run_id, metric_type, dimension_value)
DO UPDATE SET
sample_size = EXCLUDED.sample_size,
total_actual_units = EXCLUDED.total_actual_units,
total_forecast_units = EXCLUDED.total_forecast_units,
mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
computed_at = NOW()
""", (run_id, metric_type, dim_val, sample_size,
float(total_actual), float(total_forecast),
float(mae) if mae is not None else None,
float(wmape) if wmape is not None else None,
float(bias) if bias is not None else None,
float(rmse) if rmse is not None else None))
total_inserted += 1
def _f(x):
return float(x) if x is not None else None
def run_and_insert(metric_type, sql):
cur.execute(base_cte + sql)
n = 0
for row in cur.fetchall():
(dim_val, sample_size, total_actual, total_forecast,
mae, wmape, bias, rmse, naive_wmape) = row
fva = None
if wmape is not None and naive_wmape is not None and float(naive_wmape) > 0:
fva = 1.0 - float(wmape) / float(naive_wmape)
cur.execute(insert_sql, (
run_id, metric_type, dim_val, sample_size,
_f(total_actual), _f(total_forecast), _f(mae), _f(wmape),
_f(bias), _f(rmse), _f(naive_wmape), _f(fva)))
n += 1
return n
total_inserted = 0
# overall: two rows — 'all' (non-dormant, the headline) and
# 'all_incl_dormant' (everything, so the ~11% dormant demand stops being
# invisible). Both are short-lead (lead 0-6). F5.
overall_source = """(
SELECT a.*, 'all'::text AS dim FROM accuracy a WHERE a.lifecycle_phase != 'dormant'
UNION ALL
SELECT a.*, 'all_incl_dormant'::text AS dim FROM accuracy a
)"""
total_inserted += run_and_insert('overall',
daily_agg('a.dim', overall_source, group_by='a.dim'))
# by_phase / by_method / daily — short-lead daily-grain over `accuracy`.
total_inserted += run_and_insert('by_phase',
daily_agg('a.lifecycle_phase', 'accuracy', group_by='a.lifecycle_phase'))
total_inserted += run_and_insert('by_method',
daily_agg('a.forecast_method', 'accuracy', group_by='a.forecast_method'))
total_inserted += run_and_insert('daily',
daily_agg('a.forecast_date::text', 'accuracy',
where="a.lifecycle_phase != 'dormant'", group_by='a.forecast_date'))
# by_lead_time — one sample per (pid, date, lead bucket) over `lead_accuracy`.
# Buckets beyond '1-7d' populate as the future-lead archives (F7) mature.
total_inserted += run_and_insert('by_lead_time',
daily_agg('a.lead_bucket', 'lead_accuracy', group_by='a.lead_bucket'))
# overall_weekly — the informative headline for intermittent retail demand.
# Aggregate the short-lead rows to (pid, complete week), then WMAPE over
# pid-weeks. Daily-grain WMAPE has a ~190% floor on this catalog; weekly
# grain is ~109% and responds to real improvement. F9.
weekly_sql = """,
weekly AS (
SELECT pid, date_trunc('week', forecast_date) AS wk,
SUM(forecast_units) AS fc_week,
SUM(actual_units) AS act_week,
SUM(naive_units) AS naive_week,
bool_and(naive_units IS NOT NULL) AS naive_complete
FROM short_lead_eval
WHERE lifecycle_phase != 'dormant'
GROUP BY pid, date_trunc('week', forecast_date)
HAVING COUNT(*) = 7
)
SELECT 'all'::text AS dim,
COUNT(*) AS sample_size,
COALESCE(SUM(act_week), 0) AS total_actual,
COALESCE(SUM(fc_week), 0) AS total_forecast,
AVG(ABS(fc_week - act_week)) AS mae,
CASE WHEN SUM(act_week) > 0
THEN SUM(ABS(fc_week - act_week)) / SUM(act_week) ELSE NULL END AS wmape,
AVG(fc_week - act_week) AS bias,
SQRT(AVG(POWER(fc_week - act_week, 2))) AS rmse,
CASE WHEN SUM(act_week) FILTER (WHERE naive_complete) > 0
THEN SUM(ABS(naive_week - act_week)) FILTER (WHERE naive_complete)
/ SUM(act_week) FILTER (WHERE naive_complete)
ELSE NULL END AS naive_wmape
FROM weekly
WHERE NOT (fc_week = 0 AND act_week = 0)
"""
total_inserted += run_and_insert('overall_weekly', weekly_sql)
conn.commit()
@@ -1562,6 +1868,10 @@ def main():
conn, curves_df, dow_indices, monthly_indices, accuracy_margins
)
# Phase 4b: Snapshot sampled future-lead forecasts (7/14/30/60/89d) from
# the fresh run so long-lead accuracy populates once those dates pass (F7).
archive_future_leads(conn, run_id)
duration = time.time() - start_time
# Record run completion (include DOW indices in metadata)
+46 -11
View File
@@ -357,6 +357,9 @@ router.get('/forecast/metrics', async (req, res) => {
const active = parseInt(totals.active_products) || 1;
const curveProducts = parseInt(totals.curve_products) || 0;
// NOTE: despite the name, this is "share of active products forecast via
// lifecycle curves" (curve coverage), NOT a statistical confidence. It only
// feeds a per-day tooltip field. See FORECAST_FIX_PLAN F9 (point 4).
const confidenceLevel = parseFloat((curveProducts / active).toFixed(2));
// Daily series from actual forecast
@@ -687,14 +690,29 @@ router.get('/forecast/accuracy', async (req, res) => {
const { rows: metrics } = await executeQuery(`
SELECT metric_type, dimension_value, sample_size,
total_actual_units, total_forecast_units,
mae, wmape, bias, rmse
mae, wmape, bias, rmse, naive_wmape, fva
FROM forecast_accuracy
WHERE run_id = $1
ORDER BY metric_type, dimension_value
`, [latestRunId]);
// Shared shaping for an "overall"-style aggregate row (daily or weekly grain).
const shapeOverall = (m) => m ? {
sampleSize: parseInt(m.sample_size),
totalActual: parseFloat(m.total_actual_units) || 0,
totalForecast: parseFloat(m.total_forecast_units) || 0,
mae: m.mae != null ? parseFloat(parseFloat(m.mae).toFixed(4)) : null,
wmape: m.wmape != null ? parseFloat((parseFloat(m.wmape) * 100).toFixed(1)) : null,
bias: m.bias != null ? parseFloat(parseFloat(m.bias).toFixed(4)) : null,
rmse: m.rmse != null ? parseFloat(parseFloat(m.rmse).toFixed(4)) : null,
naiveWmape: m.naive_wmape != null ? parseFloat((parseFloat(m.naive_wmape) * 100).toFixed(1)) : null,
fva: m.fva != null ? parseFloat(parseFloat(m.fva).toFixed(3)) : null,
} : null;
// Organize into response structure
const overall = metrics.find(m => m.metric_type === 'overall');
const overall = metrics.find(m => m.metric_type === 'overall' && m.dimension_value === 'all')
const overallInclDormant = metrics.find(m => m.metric_type === 'overall' && m.dimension_value === 'all_incl_dormant')
const overallWeekly = metrics.find(m => m.metric_type === 'overall_weekly');
const byPhase = metrics
.filter(m => m.metric_type === 'by_phase')
.map(m => ({
@@ -706,6 +724,8 @@ router.get('/forecast/accuracy', async (req, res) => {
wmape: m.wmape != null ? parseFloat((parseFloat(m.wmape) * 100).toFixed(1)) : null,
bias: m.bias != null ? parseFloat(parseFloat(m.bias).toFixed(4)) : null,
rmse: m.rmse != null ? parseFloat(parseFloat(m.rmse).toFixed(4)) : null,
naiveWmape: m.naive_wmape != null ? parseFloat((parseFloat(m.naive_wmape) * 100).toFixed(1)) : null,
fva: m.fva != null ? parseFloat(parseFloat(m.fva).toFixed(3)) : null,
}))
.sort((a, b) => (b.totalActual || 0) - (a.totalActual || 0));
@@ -763,6 +783,26 @@ router.get('/forecast/accuracy', async (req, res) => {
sampleSize: parseInt(r.sample_size),
}));
// Weekly-grain trend across runs (starts empty for old runs that predate
// the overall_weekly metric — that's expected, no backfill). F9.
const { rows: weeklyTrendRows } = await executeQuery(`
SELECT fr.finished_at::date AS run_date,
fa.wmape, fa.naive_wmape, fa.fva, fa.sample_size
FROM forecast_accuracy fa
JOIN forecast_runs fr ON fr.id = fa.run_id
WHERE fa.metric_type = 'overall_weekly'
AND fa.dimension_value = 'all'
ORDER BY fr.finished_at
`);
const accuracyTrendWeekly = weeklyTrendRows.map(r => ({
date: r.run_date instanceof Date ? r.run_date.toISOString().split('T')[0] : r.run_date,
wmape: r.wmape != null ? parseFloat((parseFloat(r.wmape) * 100).toFixed(1)) : null,
naiveWmape: r.naive_wmape != null ? parseFloat((parseFloat(r.naive_wmape) * 100).toFixed(1)) : null,
fva: r.fva != null ? parseFloat(parseFloat(r.fva).toFixed(3)) : null,
sampleSize: parseInt(r.sample_size),
}));
res.json({
hasData: true,
computedAt,
@@ -775,20 +815,15 @@ router.get('/forecast/accuracy', async (req, res) => {
? historyInfo.latest_date.toISOString().split('T')[0]
: historyInfo.latest_date,
},
overall: overall ? {
sampleSize: parseInt(overall.sample_size),
totalActual: parseFloat(overall.total_actual_units) || 0,
totalForecast: parseFloat(overall.total_forecast_units) || 0,
mae: overall.mae != null ? parseFloat(parseFloat(overall.mae).toFixed(4)) : null,
wmape: overall.wmape != null ? parseFloat((parseFloat(overall.wmape) * 100).toFixed(1)) : null,
bias: overall.bias != null ? parseFloat(parseFloat(overall.bias).toFixed(4)) : null,
rmse: overall.rmse != null ? parseFloat(parseFloat(overall.rmse).toFixed(4)) : null,
} : null,
overall: shapeOverall(overall),
overallInclDormant: shapeOverall(overallInclDormant),
overallWeekly: shapeOverall(overallWeekly),
byPhase,
byLeadTime,
byMethod,
dailyTrend,
accuracyTrend,
accuracyTrendWeekly,
});
} catch (err) {
console.error('Error fetching forecast accuracy:', err);