Forecast improvements
This commit is contained in:
@@ -634,6 +634,52 @@ def forecast_from_curve(curve_params, scale_factor, age_days, horizon_days):
|
||||
return np.array(forecasts)
|
||||
|
||||
|
||||
def forecast_preorder(curve_params, scale_factor, days_until_arrival,
|
||||
preorder_daily_rate, horizon_days):
|
||||
"""
|
||||
Piecewise pre-order forecast: a flat observed pre-order trickle until the
|
||||
product is expected to arrive, then the scaled launch curve from age 0.
|
||||
|
||||
The launch curve was fit on POST-receipt order history, so running it from
|
||||
today (while the product is still weeks from arriving) front-loads full
|
||||
first-week launch volume that hasn't happened yet — the main driver of the
|
||||
~2.15x preorder over-forecast. Instead we forecast the slow pre-order rate
|
||||
up to the arrival date, then start the curve's day 0 on that date.
|
||||
See FORECAST_FIX_PLAN F4.
|
||||
|
||||
Args:
|
||||
curve_params: (amplitude, decay_rate, baseline, ...) weekly curve
|
||||
scale_factor: per-product multiplier for the post-arrival curve envelope
|
||||
days_until_arrival: calendar days from today until expected arrival
|
||||
preorder_daily_rate: observed pre-order units/day (trickle)
|
||||
horizon_days: forecast horizon length
|
||||
|
||||
Returns:
|
||||
array of daily forecast values of length horizon_days
|
||||
"""
|
||||
amplitude, decay_rate, baseline = curve_params[:3]
|
||||
forecasts = np.zeros(horizon_days)
|
||||
|
||||
# Clamp the arrival offset into the horizon
|
||||
dua = int(max(0, min(days_until_arrival, horizon_days)))
|
||||
|
||||
# Pre-arrival segment: flat pre-order trickle, capped at the curve's scaled
|
||||
# week-0 daily value (a pre-order day shouldn't out-sell the launch peak).
|
||||
if dua > 0:
|
||||
week0_daily = (amplitude / 7.0) * scale_factor + (baseline / 7.0)
|
||||
pre_rate = preorder_daily_rate
|
||||
if week0_daily > 0:
|
||||
pre_rate = min(pre_rate, week0_daily)
|
||||
forecasts[:dua] = max(0.0, pre_rate)
|
||||
|
||||
# Post-arrival segment: scaled launch curve, curve day 0 = arrival date.
|
||||
if dua < horizon_days:
|
||||
curve_part = forecast_from_curve(curve_params, scale_factor, 0, horizon_days - dua)
|
||||
forecasts[dua:] = curve_part
|
||||
|
||||
return forecasts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch data loading (eliminates N+1 per-product queries)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -651,9 +697,11 @@ def batch_load_product_data(conn, products):
|
||||
data = {
|
||||
'preorder_sales': {},
|
||||
'preorder_days': {},
|
||||
'preorder_arrival_days': {},
|
||||
'launch_sales': {},
|
||||
'decay_velocity': {},
|
||||
'mature_history': {},
|
||||
'dormant_rate': {},
|
||||
}
|
||||
|
||||
# Pre-order sales: orders placed BEFORE first received date
|
||||
@@ -677,6 +725,39 @@ def batch_load_product_data(conn, products):
|
||||
data['preorder_days'][int(row['pid'])] = float(row['preorder_days'])
|
||||
log.info(f"Batch loaded pre-order sales for {len(data['preorder_sales'])}/{len(preorder_pids)} preorder products")
|
||||
|
||||
# Expected arrival per pre-order product, to time the launch curve.
|
||||
# Prefer the soonest FUTURE expected_date on an open PO; if the only open
|
||||
# PO has a past expected_date assume 7 days; if there's no open PO at all
|
||||
# assume 14 days. See FORECAST_FIX_PLAN F4.
|
||||
arrival_sql = """
|
||||
SELECT pid,
|
||||
MIN(expected_date) FILTER (
|
||||
WHERE expected_date IS NOT NULL AND expected_date >= CURRENT_DATE
|
||||
) AS future_arrival
|
||||
FROM purchase_orders
|
||||
WHERE pid = ANY(%s)
|
||||
AND status IN ('created', 'ordered', 'electronically_sent', 'receiving_started')
|
||||
GROUP BY pid
|
||||
"""
|
||||
adf = execute_query(conn, arrival_sql, [preorder_pids])
|
||||
today = date.today()
|
||||
for _, row in adf.iterrows():
|
||||
pid = int(row['pid'])
|
||||
fa = row['future_arrival']
|
||||
if pd.notna(fa):
|
||||
fa_date = pd.Timestamp(fa).date()
|
||||
data['preorder_arrival_days'][pid] = max(0, (fa_date - today).days)
|
||||
else:
|
||||
data['preorder_arrival_days'][pid] = 7 # open PO, expected_date already past
|
||||
no_po = 0
|
||||
for pid in preorder_pids:
|
||||
if int(pid) not in data['preorder_arrival_days']:
|
||||
data['preorder_arrival_days'][int(pid)] = 14 # no open PO at all
|
||||
no_po += 1
|
||||
log.info(f"Batch loaded preorder arrival for "
|
||||
f"{len(data['preorder_arrival_days']) - no_po}/{len(preorder_pids)} via open POs, "
|
||||
f"{no_po} defaulted to 14d")
|
||||
|
||||
# Launch sales: first 14 days after first received
|
||||
launch_pids = products[products['phase'] == 'launch']['pid'].tolist()
|
||||
if launch_pids:
|
||||
@@ -694,15 +775,23 @@ def batch_load_product_data(conn, products):
|
||||
data['launch_sales'][int(row['pid'])] = float(row['total_sold'])
|
||||
log.info(f"Batch loaded launch sales for {len(data['launch_sales'])}/{len(launch_pids)} launch products")
|
||||
|
||||
# Decay recent velocity: average daily sales over last 30 days
|
||||
# Decay recent velocity: TRUE calendar-daily average over the last 30 days.
|
||||
# We divide the summed units by calendar days (clipped to the product's age),
|
||||
# NOT by the number of snapshot rows. Snapshots are sparse and mostly land on
|
||||
# sold-days, so AVG(units_sold) averages over sold-days only and inflated the
|
||||
# decay rate ~4x (measured 1.353 vs true 0.332 units/day). See FORECAST_FIX_PLAN F1.
|
||||
decay_pids = products[products['phase'] == 'decay']['pid'].tolist()
|
||||
if decay_pids:
|
||||
sql = """
|
||||
SELECT dps.pid, AVG(COALESCE(dps.units_sold, 0)) AS avg_daily
|
||||
SELECT dps.pid,
|
||||
SUM(COALESCE(dps.units_sold, 0))::float
|
||||
/ GREATEST(LEAST(30, (CURRENT_DATE - pm.date_first_received::date)), 1) AS avg_daily
|
||||
FROM daily_product_snapshots dps
|
||||
JOIN product_metrics pm ON pm.pid = dps.pid
|
||||
WHERE dps.pid = ANY(%s)
|
||||
AND dps.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY dps.pid
|
||||
AND dps.snapshot_date >= pm.date_first_received::date
|
||||
GROUP BY dps.pid, pm.date_first_received
|
||||
"""
|
||||
df = execute_query(conn, sql, [decay_pids])
|
||||
for _, row in df.iterrows():
|
||||
@@ -724,6 +813,25 @@ def batch_load_product_data(conn, products):
|
||||
data['mature_history'][int(pid)] = group.copy()
|
||||
log.info(f"Batch loaded history for {len(data['mature_history'])}/{len(mature_pids)} mature products")
|
||||
|
||||
# Dormant trailing order rate: dormant products forecast 0 by default, but
|
||||
# ~11K of them still sell (restocks, promos, long-tail) — ~11% of all demand
|
||||
# currently forecast as a hard zero. Load a trailing-180-day daily order rate
|
||||
# so the dormant branch can carry a small positive rate. See FORECAST_FIX_PLAN F5.
|
||||
dormant_pids = products[products['phase'] == 'dormant']['pid'].tolist()
|
||||
if dormant_pids:
|
||||
sql = """
|
||||
SELECT o.pid, SUM(o.quantity) / 180.0 AS rate
|
||||
FROM orders o
|
||||
WHERE o.pid = ANY(%s)
|
||||
AND o.canceled IS DISTINCT FROM TRUE
|
||||
AND o.date >= CURRENT_DATE - INTERVAL '180 days'
|
||||
GROUP BY o.pid
|
||||
"""
|
||||
df = execute_query(conn, sql, [dormant_pids])
|
||||
for _, row in df.iterrows():
|
||||
data['dormant_rate'][int(row['pid'])] = float(row['rate'])
|
||||
log.info(f"Batch loaded dormant order rate for {len(data['dormant_rate'])}/{len(dormant_pids)} dormant products")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -829,11 +937,20 @@ def forecast_mature(product, history_df):
|
||||
# Not enough data — flat velocity
|
||||
return np.full(FORECAST_HORIZON_DAYS, velocity)
|
||||
|
||||
# Fill date gaps with 0 sales (days where product had no snapshot = no sales)
|
||||
# Reindex over the FULL calendar window ending yesterday, not just the span
|
||||
# between the first and last snapshot. resample() only covers first→last
|
||||
# snapshot, so leading/trailing quiet periods are absent and the Holt level
|
||||
# is fitted only on the product's busy span (can run ~4x too high). An
|
||||
# explicit reindex fills every quiet calendar day with 0. (pid, snapshot_date)
|
||||
# is unique so there is no duplicate-index risk; do NOT use combine_first
|
||||
# (it keeps zeros over real data). See FORECAST_FIX_PLAN F2.
|
||||
hist = history_df.copy()
|
||||
hist['snapshot_date'] = pd.to_datetime(hist['snapshot_date'])
|
||||
hist = hist.set_index('snapshot_date').resample('D').sum().fillna(0)
|
||||
series = hist['units_sold'].values.astype(float)
|
||||
hist = hist.set_index('snapshot_date')['units_sold']
|
||||
full_index = pd.date_range(
|
||||
end=pd.Timestamp(date.today() - timedelta(days=1)),
|
||||
periods=EXP_SMOOTHING_WINDOW, freq='D')
|
||||
series = hist.reindex(full_index, fill_value=0.0).values.astype(float)
|
||||
|
||||
# Need at least 2 non-zero values for smoothing
|
||||
if np.count_nonzero(series) < 2:
|
||||
@@ -956,9 +1073,24 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
|
||||
today = date.today()
|
||||
forecast_dates = [today + timedelta(days=i) for i in range(FORECAST_HORIZON_DAYS)]
|
||||
|
||||
# Pre-compute DOW and seasonal multipliers for each forecast date
|
||||
# Pre-compute DOW and seasonal multipliers for each forecast date.
|
||||
# DOW multipliers stay ABSOLUTE — every calibration is a multi-week average
|
||||
# and therefore DOW-neutral, so reshaping by absolute DOW indices is correct.
|
||||
# Seasonal indices must be applied RELATIVE to the calibration period:
|
||||
# each per-product calibration (decay velocity, mature Holt level, launch /
|
||||
# preorder scale) is fitted on raw recent actuals that already embed the
|
||||
# current month's seasonal level. Multiplying by the absolute target-month
|
||||
# index double-counts seasonality (~25% over-forecast at the May→June sale
|
||||
# transition, worse near November). Divide by the trailing-30-day average
|
||||
# index so only the seasonal *change* from calibration to target applies.
|
||||
# See FORECAST_FIX_PLAN F3.
|
||||
dow_multipliers = [dow_indices.get(d.isoweekday(), 1.0) for d in forecast_dates]
|
||||
seasonal_multipliers = [monthly_indices.get(d.month, 1.0) for d in forecast_dates]
|
||||
trailing = [today - timedelta(days=i) for i in range(1, 31)]
|
||||
calibration_index = float(np.mean([monthly_indices.get(d.month, 1.0) for d in trailing]))
|
||||
seasonal_multipliers = [
|
||||
monthly_indices.get(d.month, 1.0) / max(calibration_index, 0.1)
|
||||
for d in forecast_dates
|
||||
]
|
||||
|
||||
# TRUNCATE before streaming writes
|
||||
with conn.cursor() as cur:
|
||||
@@ -1002,9 +1134,33 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
|
||||
try:
|
||||
curve_info = get_curve_for_product(product, curves_df)
|
||||
|
||||
if phase in ('preorder', 'launch'):
|
||||
if phase == 'preorder':
|
||||
if curve_info:
|
||||
scale = compute_scale_factor(phase, product, curve_info, batch_data)
|
||||
scale = compute_scale_factor('preorder', product, curve_info, batch_data)
|
||||
# Time the launch curve to expected arrival instead of
|
||||
# running it from today (F4). Pre-arrival days carry the
|
||||
# observed pre-order trickle rate.
|
||||
days_until_arrival = batch_data['preorder_arrival_days'].get(pid, 14)
|
||||
preorder_units = batch_data['preorder_sales'].get(pid, 0)
|
||||
preorder_days = batch_data['preorder_days'].get(pid, 1)
|
||||
preorder_daily_rate = preorder_units / max(preorder_days, 1)
|
||||
forecasts = forecast_preorder(
|
||||
curve_info, scale, days_until_arrival,
|
||||
preorder_daily_rate, FORECAST_HORIZON_DAYS)
|
||||
method = 'lifecycle_curve'
|
||||
else:
|
||||
# No reliable curve — fall back to velocity if available
|
||||
velocity = product.get('sales_velocity_daily') or 0
|
||||
if velocity > 0:
|
||||
forecasts = np.full(FORECAST_HORIZON_DAYS, velocity)
|
||||
method = 'velocity'
|
||||
else:
|
||||
forecasts = forecast_dormant()
|
||||
method = 'zero'
|
||||
|
||||
elif phase == 'launch':
|
||||
if curve_info:
|
||||
scale = compute_scale_factor('launch', product, curve_info, batch_data)
|
||||
forecasts = forecast_from_curve(curve_info, scale, age, FORECAST_HORIZON_DAYS)
|
||||
method = 'lifecycle_curve'
|
||||
else:
|
||||
@@ -1038,8 +1194,16 @@ def generate_all_forecasts(conn, curves_df, dow_indices, monthly_indices=None,
|
||||
method = 'velocity'
|
||||
|
||||
else: # dormant
|
||||
forecasts = forecast_dormant()
|
||||
method = 'zero'
|
||||
# Carry a small positive rate for dormant products that still
|
||||
# trickle sales (restocks/promos/long-tail); only truly dead
|
||||
# products stay at zero. See FORECAST_FIX_PLAN F5.
|
||||
rate = batch_data['dormant_rate'].get(pid, 0)
|
||||
if rate > 0:
|
||||
forecasts = np.full(FORECAST_HORIZON_DAYS, rate)
|
||||
method = 'velocity'
|
||||
else:
|
||||
forecasts = forecast_dormant()
|
||||
method = 'zero'
|
||||
|
||||
# Confidence interval: use accuracy-calibrated margins per phase
|
||||
base_margin = accuracy_margins.get(phase, 0.5)
|
||||
@@ -1108,6 +1272,8 @@ def archive_forecasts(conn, run_id):
|
||||
""")
|
||||
cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_date ON product_forecasts_history(forecast_date)")
|
||||
cur.execute("CREATE INDEX IF NOT EXISTS idx_pfh_pid_date ON product_forecasts_history(pid, forecast_date)")
|
||||
# Naive-baseline column for forecast value-added (FVA). See FORECAST_FIX_PLAN F8.
|
||||
cur.execute("ALTER TABLE product_forecasts_history ADD COLUMN IF NOT EXISTS naive_units NUMERIC(10,2)")
|
||||
|
||||
# Find the previous completed run (whose forecasts are still in product_forecasts)
|
||||
cur.execute("""
|
||||
@@ -1124,15 +1290,27 @@ def archive_forecasts(conn, run_id):
|
||||
|
||||
prev_run_id = prev_run[0]
|
||||
|
||||
# Archive only past-date forecasts (where actuals now exist)
|
||||
# Archive only past-date forecasts (where actuals now exist). Attach the
|
||||
# naive baseline (flat trailing-28-day daily average) at the same time so
|
||||
# forecast value-added can be measured. See FORECAST_FIX_PLAN F8.
|
||||
cur.execute("""
|
||||
INSERT INTO product_forecasts_history
|
||||
(run_id, pid, forecast_date, forecast_units, forecast_revenue,
|
||||
lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at)
|
||||
SELECT %s, pid, forecast_date, forecast_units, forecast_revenue,
|
||||
lifecycle_phase, forecast_method, confidence_lower, confidence_upper, generated_at
|
||||
FROM product_forecasts
|
||||
WHERE forecast_date < CURRENT_DATE
|
||||
lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
|
||||
generated_at, naive_units)
|
||||
SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
|
||||
pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
|
||||
pf.generated_at, COALESCE(nv.naive_daily, 0)
|
||||
FROM product_forecasts pf
|
||||
LEFT JOIN (
|
||||
SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
|
||||
FROM orders o
|
||||
WHERE o.canceled IS DISTINCT FROM TRUE
|
||||
AND o.date >= CURRENT_DATE - INTERVAL '28 days'
|
||||
AND o.date < CURRENT_DATE
|
||||
GROUP BY o.pid
|
||||
) nv ON nv.pid = pf.pid
|
||||
WHERE pf.forecast_date < CURRENT_DATE
|
||||
ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
|
||||
""", (prev_run_id,))
|
||||
|
||||
@@ -1154,6 +1332,48 @@ def archive_forecasts(conn, run_id):
|
||||
return archived
|
||||
|
||||
|
||||
def archive_future_leads(conn, run_id):
|
||||
"""
|
||||
Archive a sampled set of FUTURE-lead forecasts from the just-generated
|
||||
product_forecasts, attributed to the current run.
|
||||
|
||||
The past-date archive in archive_forecasts() only ever captures the 1-day
|
||||
slice that just elapsed, so every accuracy sample lands in the '1-7d' lead
|
||||
bucket and the 15/30/60/90-day forecasts that purchasing actually rides on
|
||||
are never validated. Here we snapshot the 7/14/30/60/89-day-ahead leads
|
||||
(non-dormant) so that, once each date passes, compute_accuracy() can score
|
||||
them in their lead bucket. The naive baseline is attached the same way as in
|
||||
the past-date path. Future-dated rows survive the 90-day prune until their
|
||||
own date passes. See FORECAST_FIX_PLAN F7.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
INSERT INTO product_forecasts_history
|
||||
(run_id, pid, forecast_date, forecast_units, forecast_revenue,
|
||||
lifecycle_phase, forecast_method, confidence_lower, confidence_upper,
|
||||
generated_at, naive_units)
|
||||
SELECT %s, pf.pid, pf.forecast_date, pf.forecast_units, pf.forecast_revenue,
|
||||
pf.lifecycle_phase, pf.forecast_method, pf.confidence_lower, pf.confidence_upper,
|
||||
pf.generated_at, COALESCE(nv.naive_daily, 0)
|
||||
FROM product_forecasts pf
|
||||
LEFT JOIN (
|
||||
SELECT o.pid, SUM(o.quantity) / 28.0 AS naive_daily
|
||||
FROM orders o
|
||||
WHERE o.canceled IS DISTINCT FROM TRUE
|
||||
AND o.date >= CURRENT_DATE - INTERVAL '28 days'
|
||||
AND o.date < CURRENT_DATE
|
||||
GROUP BY o.pid
|
||||
) nv ON nv.pid = pf.pid
|
||||
WHERE pf.lifecycle_phase != 'dormant'
|
||||
AND pf.forecast_date - CURRENT_DATE IN (7, 14, 30, 60, 89)
|
||||
ON CONFLICT (run_id, pid, forecast_date) DO NOTHING
|
||||
""", (run_id,))
|
||||
archived = cur.rowcount
|
||||
conn.commit()
|
||||
log.info(f"Archived {archived} future-lead forecast rows (7/14/30/60/89d) for run {run_id}")
|
||||
return archived
|
||||
|
||||
|
||||
def compute_accuracy(conn, run_id):
|
||||
"""
|
||||
Compute forecast accuracy metrics from archived history vs. actual sales.
|
||||
@@ -1162,11 +1382,18 @@ def compute_accuracy(conn, run_id):
|
||||
(pid, forecast_date = snapshot_date) to compare forecasted vs. actual units.
|
||||
|
||||
Stores results in forecast_accuracy table, broken down by:
|
||||
- overall: single aggregate row
|
||||
- overall: two rows — 'all' (non-dormant) and 'all_incl_dormant' (F5)
|
||||
- overall_weekly: per-product weekly-grain WMAPE — the informative headline
|
||||
for intermittent demand (daily grain has a ~190% floor) (F9)
|
||||
- by_phase: per lifecycle phase
|
||||
- by_lead_time: bucketed by how far ahead the forecast was
|
||||
- by_lead_time: bucketed by how far ahead the forecast was — long-lead
|
||||
buckets populate as the future-lead archives mature (F7)
|
||||
- by_method: per forecast method
|
||||
- daily: per forecast_date (for trend charts)
|
||||
|
||||
Every dimension also stores naive_wmape (flat trailing-28d baseline) and
|
||||
fva = 1 - wmape/naive_wmape, so the engine can be judged as value-over-naive
|
||||
(F8). Only realized dates (forecast_date < CURRENT_DATE) are scored.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
# Ensure accuracy table exists
|
||||
@@ -1186,6 +1413,10 @@ def compute_accuracy(conn, run_id):
|
||||
PRIMARY KEY (run_id, metric_type, dimension_value)
|
||||
)
|
||||
""")
|
||||
# Naive-baseline WMAPE and forecast value-added (FVA = 1 - wmape/naive_wmape).
|
||||
# See FORECAST_FIX_PLAN F8.
|
||||
cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS naive_wmape NUMERIC(10,4)")
|
||||
cur.execute("ALTER TABLE forecast_accuracy ADD COLUMN IF NOT EXISTS fva NUMERIC(10,4)")
|
||||
conn.commit()
|
||||
|
||||
# Check if we have any history to analyze
|
||||
@@ -1195,124 +1426,199 @@ def compute_accuracy(conn, run_id):
|
||||
log.info("No forecast history available for accuracy computation")
|
||||
return
|
||||
|
||||
# For each (pid, forecast_date) pair, keep only the most recent run's
|
||||
# forecast row. This prevents double-counting when multiple runs have
|
||||
# archived forecasts for the same product×date combination.
|
||||
accuracy_cte = """
|
||||
WITH ranked_history AS (
|
||||
# Base CTEs (FORECAST_FIX_PLAN F7):
|
||||
# - Only score realized dates (forecast_date < CURRENT_DATE); future-lead
|
||||
# archives are excluded until their date passes.
|
||||
# - short_lead*: lead 0-6 deduped per (pid, forecast_date) — preserves the
|
||||
# meaning of the existing headline metrics. short_lead_eval keeps the
|
||||
# raw snapshot grid (incl. zero-zero days) for complete-week detection;
|
||||
# `accuracy` drops zero-zero days for daily-grain metrics.
|
||||
# - lead_dedup/lead_accuracy: deduped per (pid, forecast_date, lead_bucket)
|
||||
# so each long-lead bucket gets its own sample (the by_lead_time table).
|
||||
base_cte = """
|
||||
WITH ranked_all AS (
|
||||
SELECT
|
||||
pfh.*,
|
||||
pfh.pid, pfh.forecast_date, pfh.forecast_units, pfh.naive_units,
|
||||
pfh.lifecycle_phase, pfh.forecast_method,
|
||||
fr.started_at,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY pfh.pid, pfh.forecast_date
|
||||
ORDER BY fr.started_at DESC
|
||||
) AS rn
|
||||
(pfh.forecast_date - fr.started_at::date) AS lead_days,
|
||||
CASE
|
||||
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 0 AND 6 THEN '1-7d'
|
||||
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 7 AND 13 THEN '8-14d'
|
||||
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 14 AND 29 THEN '15-30d'
|
||||
WHEN (pfh.forecast_date - fr.started_at::date) BETWEEN 30 AND 59 THEN '31-60d'
|
||||
ELSE '61-90d'
|
||||
END AS lead_bucket
|
||||
FROM product_forecasts_history pfh
|
||||
JOIN forecast_runs fr ON fr.id = pfh.run_id
|
||||
WHERE pfh.forecast_date < CURRENT_DATE
|
||||
),
|
||||
short_lead AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY pid, forecast_date ORDER BY started_at DESC
|
||||
) AS rn
|
||||
FROM ranked_all
|
||||
WHERE lead_days BETWEEN 0 AND 6
|
||||
),
|
||||
short_lead_eval AS (
|
||||
SELECT sl.pid, sl.lifecycle_phase, sl.forecast_method, sl.forecast_date,
|
||||
sl.forecast_units, sl.naive_units,
|
||||
COALESCE(dps.units_sold, 0) AS actual_units,
|
||||
(sl.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
|
||||
ABS(sl.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
|
||||
FROM short_lead sl
|
||||
LEFT JOIN daily_product_snapshots dps
|
||||
ON dps.pid = sl.pid AND dps.snapshot_date = sl.forecast_date
|
||||
WHERE sl.rn = 1
|
||||
),
|
||||
accuracy AS (
|
||||
SELECT
|
||||
rh.lifecycle_phase,
|
||||
rh.forecast_method,
|
||||
rh.forecast_date,
|
||||
(rh.forecast_date - rh.started_at::date) AS lead_days,
|
||||
rh.forecast_units,
|
||||
SELECT * FROM short_lead_eval
|
||||
WHERE NOT (forecast_units = 0 AND actual_units = 0)
|
||||
),
|
||||
lead_dedup AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY pid, forecast_date, lead_bucket ORDER BY started_at DESC
|
||||
) AS rn
|
||||
FROM ranked_all
|
||||
),
|
||||
lead_accuracy AS (
|
||||
SELECT ld.lead_bucket, ld.forecast_units, ld.naive_units,
|
||||
COALESCE(dps.units_sold, 0) AS actual_units,
|
||||
(rh.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
|
||||
ABS(rh.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
|
||||
FROM ranked_history rh
|
||||
(ld.forecast_units - COALESCE(dps.units_sold, 0)) AS error,
|
||||
ABS(ld.forecast_units - COALESCE(dps.units_sold, 0)) AS abs_error
|
||||
FROM lead_dedup ld
|
||||
LEFT JOIN daily_product_snapshots dps
|
||||
ON dps.pid = rh.pid AND dps.snapshot_date = rh.forecast_date
|
||||
WHERE rh.rn = 1
|
||||
AND NOT (rh.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
|
||||
ON dps.pid = ld.pid AND dps.snapshot_date = ld.forecast_date
|
||||
WHERE ld.rn = 1
|
||||
AND ld.lifecycle_phase != 'dormant'
|
||||
AND NOT (ld.forecast_units = 0 AND COALESCE(dps.units_sold, 0) = 0)
|
||||
)
|
||||
"""
|
||||
|
||||
# Compute and insert metrics for each dimension
|
||||
dimensions = {
|
||||
'overall': "SELECT 'all' AS dim",
|
||||
'by_phase': "SELECT DISTINCT lifecycle_phase AS dim FROM accuracy",
|
||||
'by_lead_time': """
|
||||
SELECT DISTINCT
|
||||
CASE
|
||||
WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
|
||||
WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
|
||||
WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
|
||||
WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
|
||||
ELSE '61-90d'
|
||||
END AS dim
|
||||
FROM accuracy
|
||||
""",
|
||||
'by_method': "SELECT DISTINCT forecast_method AS dim FROM accuracy",
|
||||
'daily': "SELECT DISTINCT forecast_date::text AS dim FROM accuracy",
|
||||
}
|
||||
|
||||
filter_clauses = {
|
||||
'overall': "lifecycle_phase != 'dormant'",
|
||||
'by_phase': "lifecycle_phase = dims.dim",
|
||||
'by_lead_time': """
|
||||
CASE
|
||||
WHEN lead_days BETWEEN 0 AND 6 THEN '1-7d'
|
||||
WHEN lead_days BETWEEN 7 AND 13 THEN '8-14d'
|
||||
WHEN lead_days BETWEEN 14 AND 29 THEN '15-30d'
|
||||
WHEN lead_days BETWEEN 30 AND 59 THEN '31-60d'
|
||||
ELSE '61-90d'
|
||||
END = dims.dim
|
||||
""",
|
||||
'by_method': "forecast_method = dims.dim",
|
||||
'daily': "forecast_date::text = dims.dim",
|
||||
}
|
||||
|
||||
total_inserted = 0
|
||||
|
||||
for metric_type, dim_query in dimensions.items():
|
||||
filter_clause = filter_clauses[metric_type]
|
||||
|
||||
sql = f"""
|
||||
{accuracy_cte},
|
||||
dims AS ({dim_query})
|
||||
# Daily-grain aggregate over a source CTE aliased `a`, computing the
|
||||
# engine WMAPE plus the naive-baseline WMAPE (NULL-safe: rows archived
|
||||
# before F8 have naive_units NULL and are excluded from the naive sums).
|
||||
def daily_agg(dim_expr, source, where=None, group_by=None):
|
||||
where_sql = f"WHERE {where}" if where else ""
|
||||
group_sql = f"GROUP BY {group_by}" if group_by else ""
|
||||
return f"""
|
||||
SELECT
|
||||
dims.dim,
|
||||
{dim_expr} AS dim,
|
||||
COUNT(*) AS sample_size,
|
||||
COALESCE(SUM(a.actual_units), 0) AS total_actual,
|
||||
COALESCE(SUM(a.forecast_units), 0) AS total_forecast,
|
||||
AVG(a.abs_error) AS mae,
|
||||
CASE WHEN SUM(a.actual_units) > 0
|
||||
THEN SUM(a.abs_error) / SUM(a.actual_units)
|
||||
ELSE NULL END AS wmape,
|
||||
THEN SUM(a.abs_error) / SUM(a.actual_units) ELSE NULL END AS wmape,
|
||||
AVG(a.error) AS bias,
|
||||
SQRT(AVG(POWER(a.error, 2))) AS rmse
|
||||
FROM dims
|
||||
CROSS JOIN accuracy a
|
||||
WHERE {filter_clause}
|
||||
GROUP BY dims.dim
|
||||
SQRT(AVG(POWER(a.error, 2))) AS rmse,
|
||||
CASE WHEN SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL) > 0
|
||||
THEN SUM(ABS(a.naive_units - a.actual_units)) FILTER (WHERE a.naive_units IS NOT NULL)
|
||||
/ SUM(a.actual_units) FILTER (WHERE a.naive_units IS NOT NULL)
|
||||
ELSE NULL END AS naive_wmape
|
||||
FROM {source} a
|
||||
{where_sql}
|
||||
{group_sql}
|
||||
"""
|
||||
|
||||
cur.execute(sql)
|
||||
rows = cur.fetchall()
|
||||
insert_sql = """
|
||||
INSERT INTO forecast_accuracy
|
||||
(run_id, metric_type, dimension_value, sample_size,
|
||||
total_actual_units, total_forecast_units, mae, wmape, bias, rmse,
|
||||
naive_wmape, fva)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (run_id, metric_type, dimension_value)
|
||||
DO UPDATE SET
|
||||
sample_size = EXCLUDED.sample_size,
|
||||
total_actual_units = EXCLUDED.total_actual_units,
|
||||
total_forecast_units = EXCLUDED.total_forecast_units,
|
||||
mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
|
||||
bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
|
||||
naive_wmape = EXCLUDED.naive_wmape, fva = EXCLUDED.fva,
|
||||
computed_at = NOW()
|
||||
"""
|
||||
|
||||
for row in rows:
|
||||
dim_val, sample_size, total_actual, total_forecast, mae, wmape, bias, rmse = row
|
||||
cur.execute("""
|
||||
INSERT INTO forecast_accuracy
|
||||
(run_id, metric_type, dimension_value, sample_size,
|
||||
total_actual_units, total_forecast_units, mae, wmape, bias, rmse)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (run_id, metric_type, dimension_value)
|
||||
DO UPDATE SET
|
||||
sample_size = EXCLUDED.sample_size,
|
||||
total_actual_units = EXCLUDED.total_actual_units,
|
||||
total_forecast_units = EXCLUDED.total_forecast_units,
|
||||
mae = EXCLUDED.mae, wmape = EXCLUDED.wmape,
|
||||
bias = EXCLUDED.bias, rmse = EXCLUDED.rmse,
|
||||
computed_at = NOW()
|
||||
""", (run_id, metric_type, dim_val, sample_size,
|
||||
float(total_actual), float(total_forecast),
|
||||
float(mae) if mae is not None else None,
|
||||
float(wmape) if wmape is not None else None,
|
||||
float(bias) if bias is not None else None,
|
||||
float(rmse) if rmse is not None else None))
|
||||
total_inserted += 1
|
||||
def _f(x):
|
||||
return float(x) if x is not None else None
|
||||
|
||||
def run_and_insert(metric_type, sql):
|
||||
cur.execute(base_cte + sql)
|
||||
n = 0
|
||||
for row in cur.fetchall():
|
||||
(dim_val, sample_size, total_actual, total_forecast,
|
||||
mae, wmape, bias, rmse, naive_wmape) = row
|
||||
fva = None
|
||||
if wmape is not None and naive_wmape is not None and float(naive_wmape) > 0:
|
||||
fva = 1.0 - float(wmape) / float(naive_wmape)
|
||||
cur.execute(insert_sql, (
|
||||
run_id, metric_type, dim_val, sample_size,
|
||||
_f(total_actual), _f(total_forecast), _f(mae), _f(wmape),
|
||||
_f(bias), _f(rmse), _f(naive_wmape), _f(fva)))
|
||||
n += 1
|
||||
return n
|
||||
|
||||
total_inserted = 0
|
||||
|
||||
# overall: two rows — 'all' (non-dormant, the headline) and
|
||||
# 'all_incl_dormant' (everything, so the ~11% dormant demand stops being
|
||||
# invisible). Both are short-lead (lead 0-6). F5.
|
||||
overall_source = """(
|
||||
SELECT a.*, 'all'::text AS dim FROM accuracy a WHERE a.lifecycle_phase != 'dormant'
|
||||
UNION ALL
|
||||
SELECT a.*, 'all_incl_dormant'::text AS dim FROM accuracy a
|
||||
)"""
|
||||
total_inserted += run_and_insert('overall',
|
||||
daily_agg('a.dim', overall_source, group_by='a.dim'))
|
||||
|
||||
# by_phase / by_method / daily — short-lead daily-grain over `accuracy`.
|
||||
total_inserted += run_and_insert('by_phase',
|
||||
daily_agg('a.lifecycle_phase', 'accuracy', group_by='a.lifecycle_phase'))
|
||||
total_inserted += run_and_insert('by_method',
|
||||
daily_agg('a.forecast_method', 'accuracy', group_by='a.forecast_method'))
|
||||
total_inserted += run_and_insert('daily',
|
||||
daily_agg('a.forecast_date::text', 'accuracy',
|
||||
where="a.lifecycle_phase != 'dormant'", group_by='a.forecast_date'))
|
||||
|
||||
# by_lead_time — one sample per (pid, date, lead bucket) over `lead_accuracy`.
|
||||
# Buckets beyond '1-7d' populate as the future-lead archives (F7) mature.
|
||||
total_inserted += run_and_insert('by_lead_time',
|
||||
daily_agg('a.lead_bucket', 'lead_accuracy', group_by='a.lead_bucket'))
|
||||
|
||||
# overall_weekly — the informative headline for intermittent retail demand.
|
||||
# Aggregate the short-lead rows to (pid, complete week), then WMAPE over
|
||||
# pid-weeks. Daily-grain WMAPE has a ~190% floor on this catalog; weekly
|
||||
# grain is ~109% and responds to real improvement. F9.
|
||||
weekly_sql = """,
|
||||
weekly AS (
|
||||
SELECT pid, date_trunc('week', forecast_date) AS wk,
|
||||
SUM(forecast_units) AS fc_week,
|
||||
SUM(actual_units) AS act_week,
|
||||
SUM(naive_units) AS naive_week,
|
||||
bool_and(naive_units IS NOT NULL) AS naive_complete
|
||||
FROM short_lead_eval
|
||||
WHERE lifecycle_phase != 'dormant'
|
||||
GROUP BY pid, date_trunc('week', forecast_date)
|
||||
HAVING COUNT(*) = 7
|
||||
)
|
||||
SELECT 'all'::text AS dim,
|
||||
COUNT(*) AS sample_size,
|
||||
COALESCE(SUM(act_week), 0) AS total_actual,
|
||||
COALESCE(SUM(fc_week), 0) AS total_forecast,
|
||||
AVG(ABS(fc_week - act_week)) AS mae,
|
||||
CASE WHEN SUM(act_week) > 0
|
||||
THEN SUM(ABS(fc_week - act_week)) / SUM(act_week) ELSE NULL END AS wmape,
|
||||
AVG(fc_week - act_week) AS bias,
|
||||
SQRT(AVG(POWER(fc_week - act_week, 2))) AS rmse,
|
||||
CASE WHEN SUM(act_week) FILTER (WHERE naive_complete) > 0
|
||||
THEN SUM(ABS(naive_week - act_week)) FILTER (WHERE naive_complete)
|
||||
/ SUM(act_week) FILTER (WHERE naive_complete)
|
||||
ELSE NULL END AS naive_wmape
|
||||
FROM weekly
|
||||
WHERE NOT (fc_week = 0 AND act_week = 0)
|
||||
"""
|
||||
total_inserted += run_and_insert('overall_weekly', weekly_sql)
|
||||
|
||||
conn.commit()
|
||||
|
||||
@@ -1562,6 +1868,10 @@ def main():
|
||||
conn, curves_df, dow_indices, monthly_indices, accuracy_margins
|
||||
)
|
||||
|
||||
# Phase 4b: Snapshot sampled future-lead forecasts (7/14/30/60/89d) from
|
||||
# the fresh run so long-lead accuracy populates once those dates pass (F7).
|
||||
archive_future_leads(conn, run_id)
|
||||
|
||||
duration = time.time() - start_time
|
||||
|
||||
# Record run completion (include DOW indices in metadata)
|
||||
|
||||
Reference in New Issue
Block a user