Add in forecasting, lifecycle phases, associated component and script changes

This commit is contained in:
2026-02-13 22:45:18 -05:00
parent f41b5ab0f6
commit 45ded53530
29 changed files with 3643 additions and 376 deletions

View File

@@ -1,6 +1,7 @@
-- Description: Calculates and updates daily aggregated product data.
-- Self-healing: automatically detects and fills gaps in snapshot history.
-- Always reprocesses recent days to pick up new orders and data corrections.
-- Self-healing: detects gaps (missing snapshots), stale data (snapshot
-- aggregates that don't match source tables after backfills), and always
-- reprocesses recent days to pick up new orders and data corrections.
-- Dependencies: Core import tables (products, orders, purchase_orders), calculate_status table.
-- Frequency: Hourly (Run ~5-10 minutes after hourly data import completes).
@@ -18,28 +19,26 @@ DECLARE
BEGIN
RAISE NOTICE 'Running % script. Start Time: %', _module_name, _start_time;
-- Find the latest existing snapshot date to determine where gaps begin
-- Find the latest existing snapshot date (for logging only)
SELECT MAX(snapshot_date) INTO _latest_snapshot
FROM public.daily_product_snapshots;
-- Determine how far back to look for gaps, capped at _max_backfill_days
_backfill_start := GREATEST(
COALESCE(_latest_snapshot + 1, CURRENT_DATE - _max_backfill_days),
CURRENT_DATE - _max_backfill_days
);
-- Always scan the full backfill window to catch holes in the middle,
-- not just gaps at the end. The gap fill and stale detection queries
-- need to see the entire range to find missing or outdated snapshots.
_backfill_start := CURRENT_DATE - _max_backfill_days;
IF _latest_snapshot IS NULL THEN
RAISE NOTICE 'No existing snapshots found. Backfilling up to % days.', _max_backfill_days;
ELSIF _backfill_start > _latest_snapshot + 1 THEN
RAISE NOTICE 'Latest snapshot: %. Gap exceeds % day cap — backfilling from %. Use rebuild script for full history.',
_latest_snapshot, _max_backfill_days, _backfill_start;
ELSE
RAISE NOTICE 'Latest snapshot: %. Checking for gaps from %.', _latest_snapshot, _backfill_start;
RAISE NOTICE 'Latest snapshot: %. Scanning from % for gaps and stale data.', _latest_snapshot, _backfill_start;
END IF;
-- Process all dates that need snapshots:
-- 1. Gap fill: dates with orders/receivings but no snapshots (older than recent window)
-- 2. Recent recheck: last N days always reprocessed (picks up new orders, corrections)
-- 2. Stale detection: existing snapshots where aggregates don't match source data
-- (catches backfilled imports that arrived after snapshot was calculated)
-- 3. Recent recheck: last N days always reprocessed (picks up new orders, corrections)
FOR _target_date IN
SELECT d FROM (
-- Gap fill: find dates with activity but missing snapshots
@@ -55,6 +54,36 @@ BEGIN
SELECT 1 FROM public.daily_product_snapshots dps WHERE dps.snapshot_date = activity_dates.d
)
UNION
-- Stale detection: compare snapshot aggregates against source tables
SELECT snap_agg.snapshot_date AS d
FROM (
SELECT snapshot_date,
COALESCE(SUM(units_received), 0)::bigint AS snap_received,
COALESCE(SUM(units_sold), 0)::bigint AS snap_sold
FROM public.daily_product_snapshots
WHERE snapshot_date >= _backfill_start
AND snapshot_date < CURRENT_DATE - _recent_recheck_days
GROUP BY snapshot_date
) snap_agg
LEFT JOIN (
SELECT received_date::date AS d, SUM(qty_each)::bigint AS actual_received
FROM public.receivings
WHERE received_date::date >= _backfill_start
AND received_date::date < CURRENT_DATE - _recent_recheck_days
GROUP BY received_date::date
) recv_agg ON snap_agg.snapshot_date = recv_agg.d
LEFT JOIN (
SELECT date::date AS d,
SUM(CASE WHEN quantity > 0 AND COALESCE(status, 'pending') NOT IN ('canceled', 'returned')
THEN quantity ELSE 0 END)::bigint AS actual_sold
FROM public.orders
WHERE date::date >= _backfill_start
AND date::date < CURRENT_DATE - _recent_recheck_days
GROUP BY date::date
) orders_agg ON snap_agg.snapshot_date = orders_agg.d
WHERE snap_agg.snap_received != COALESCE(recv_agg.actual_received, 0)
OR snap_agg.snap_sold != COALESCE(orders_agg.actual_sold, 0)
UNION
-- Recent days: always reprocess
SELECT d::date
FROM generate_series(
@@ -66,11 +95,18 @@ BEGIN
ORDER BY d
LOOP
_days_processed := _days_processed + 1;
RAISE NOTICE 'Processing date: % [%/%]', _target_date, _days_processed,
_days_processed; -- count not known ahead of time, but shows progress
-- Classify why this date is being processed (for logging)
IF _target_date >= CURRENT_DATE - _recent_recheck_days THEN
RAISE NOTICE 'Processing date: % [recent recheck]', _target_date;
ELSIF NOT EXISTS (SELECT 1 FROM public.daily_product_snapshots WHERE snapshot_date = _target_date) THEN
RAISE NOTICE 'Processing date: % [gap fill — no existing snapshot]', _target_date;
ELSE
RAISE NOTICE 'Processing date: % [stale data — snapshot aggregates mismatch source]', _target_date;
END IF;
-- IMPORTANT: First delete any existing data for this date to prevent duplication
DELETE FROM public.daily_product_snapshots
DELETE FROM public.daily_product_snapshots
WHERE snapshot_date = _target_date;
-- Proceed with calculating daily metrics only for products with actual activity

View File

@@ -0,0 +1,131 @@
-- Description: Populates lifecycle forecast columns on product_metrics from product_forecasts.
-- Runs AFTER update_product_metrics.sql so that lead time / days of stock settings are available.
-- Dependencies: product_metrics (fully populated), product_forecasts, settings tables.
-- Frequency: After each metrics run and/or after forecast engine runs.
DO $$
DECLARE
_module_name TEXT := 'lifecycle_forecasts';
_start_time TIMESTAMPTZ := clock_timestamp();
_updated INT;
BEGIN
RAISE NOTICE 'Running % module. Start Time: %', _module_name, _start_time;
-- Step 1: Set lifecycle_phase from product_forecasts (one phase per product)
UPDATE product_metrics pm
SET lifecycle_phase = sub.lifecycle_phase
FROM (
SELECT DISTINCT ON (pid) pid, lifecycle_phase
FROM product_forecasts
ORDER BY pid, forecast_date
) sub
WHERE pm.pid = sub.pid
AND (pm.lifecycle_phase IS DISTINCT FROM sub.lifecycle_phase);
GET DIAGNOSTICS _updated = ROW_COUNT;
RAISE NOTICE 'Updated lifecycle_phase for % products', _updated;
-- Step 2: Compute lifecycle-based lead time and planning period forecasts
-- Uses each product's configured lead time and days of stock
WITH forecast_sums AS (
SELECT
pf.pid,
SUM(pf.forecast_units) FILTER (
WHERE pf.forecast_date <= CURRENT_DATE + s.effective_lead_time
) AS lt_forecast,
SUM(pf.forecast_units) FILTER (
WHERE pf.forecast_date <= CURRENT_DATE + s.effective_lead_time + s.effective_days_of_stock
) AS pp_forecast
FROM product_forecasts pf
JOIN (
SELECT
p.pid,
COALESCE(sp.lead_time_days, sv.default_lead_time_days,
(SELECT setting_value::int FROM settings_global WHERE setting_key = 'default_lead_time_days'), 14
) AS effective_lead_time,
COALESCE(sp.days_of_stock, sv.default_days_of_stock,
(SELECT setting_value::int FROM settings_global WHERE setting_key = 'default_days_of_stock'), 30
) AS effective_days_of_stock
FROM products p
LEFT JOIN settings_product sp ON p.pid = sp.pid
LEFT JOIN settings_vendor sv ON p.vendor = sv.vendor
) s ON s.pid = pf.pid
WHERE pf.forecast_date >= CURRENT_DATE
GROUP BY pf.pid
)
UPDATE product_metrics pm
SET
lifecycle_lead_time_forecast = COALESCE(fs.lt_forecast, 0),
lifecycle_planning_period_forecast = COALESCE(fs.pp_forecast, 0)
FROM forecast_sums fs
WHERE pm.pid = fs.pid
AND (pm.lifecycle_lead_time_forecast IS DISTINCT FROM COALESCE(fs.lt_forecast, 0)
OR pm.lifecycle_planning_period_forecast IS DISTINCT FROM COALESCE(fs.pp_forecast, 0));
GET DIAGNOSTICS _updated = ROW_COUNT;
RAISE NOTICE 'Updated lifecycle forecasts for % products', _updated;
-- Step 3: Reclassify demand_pattern using residual CV (de-trended)
-- For launch/decay products, raw CV is high because of expected lifecycle decay.
-- We subtract the expected brand curve value to get residuals, then compute CV on those.
-- Products that track their brand curve closely → low residual CV → "stable"
-- Products with erratic deviations from curve → higher residual CV → "variable"/"sporadic"
WITH product_curve AS (
-- Get each product's brand curve and age
SELECT
pm.pid,
pm.lifecycle_phase,
pm.date_first_received,
blc.amplitude,
blc.decay_rate,
blc.baseline
FROM product_metrics pm
JOIN products p ON p.pid = pm.pid
LEFT JOIN brand_lifecycle_curves blc
ON blc.brand = pm.brand
AND blc.root_category IS NULL -- brand-only curve
WHERE pm.lifecycle_phase IN ('launch', 'decay')
AND pm.date_first_received IS NOT NULL
AND blc.amplitude IS NOT NULL
),
daily_residuals AS (
-- Compute residual = actual - expected for each snapshot day
-- Curve params are in WEEKLY units; divide by 7 to get daily expected
SELECT
dps.pid,
dps.units_sold,
(pc.amplitude * EXP(-pc.decay_rate * (dps.snapshot_date - pc.date_first_received)::numeric / 7.0) + pc.baseline) / 7.0 AS expected,
dps.units_sold - (pc.amplitude * EXP(-pc.decay_rate * (dps.snapshot_date - pc.date_first_received)::numeric / 7.0) + pc.baseline) / 7.0 AS residual
FROM daily_product_snapshots dps
JOIN product_curve pc ON pc.pid = dps.pid
WHERE dps.snapshot_date >= CURRENT_DATE - INTERVAL '29 days'
AND dps.snapshot_date <= CURRENT_DATE
),
residual_cv AS (
SELECT
pid,
AVG(units_sold) AS avg_sales,
CASE WHEN COUNT(*) >= 7 AND AVG(ABS(expected)) > 0.01 THEN
STDDEV_POP(residual) / GREATEST(AVG(ABS(expected)), 0.1)
END AS res_cv
FROM daily_residuals
GROUP BY pid
)
UPDATE product_metrics pm
SET demand_pattern = classify_demand_pattern(rc.avg_sales, rc.res_cv)
FROM residual_cv rc
WHERE pm.pid = rc.pid
AND rc.res_cv IS NOT NULL
AND pm.demand_pattern IS DISTINCT FROM classify_demand_pattern(rc.avg_sales, rc.res_cv);
GET DIAGNOSTICS _updated = ROW_COUNT;
RAISE NOTICE 'Reclassified demand_pattern for % launch/decay products', _updated;
-- Update tracking
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)
VALUES (_module_name, clock_timestamp())
ON CONFLICT (module_name) DO UPDATE SET
last_calculation_timestamp = EXCLUDED.last_calculation_timestamp;
RAISE NOTICE '% module complete. Duration: %', _module_name, clock_timestamp() - _start_time;
END $$;