Fixes for metrics calculations
This commit is contained in:
@@ -1,75 +1,73 @@
|
||||
-- Description: Calculates and updates daily aggregated product data for recent days.
|
||||
-- Uses UPSERT (INSERT ON CONFLICT UPDATE) for idempotency.
|
||||
-- Description: Calculates and updates daily aggregated product data.
|
||||
-- Self-healing: automatically detects and fills gaps in snapshot history.
|
||||
-- Always reprocesses recent days to pick up new orders and data corrections.
|
||||
-- Dependencies: Core import tables (products, orders, purchase_orders), calculate_status table.
|
||||
-- Frequency: Hourly (Run ~5-10 minutes after hourly data import completes).
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
_module_name TEXT := 'daily_snapshots';
|
||||
_start_time TIMESTAMPTZ := clock_timestamp(); -- Time execution started
|
||||
_last_calc_time TIMESTAMPTZ;
|
||||
_target_date DATE; -- Will be set in the loop
|
||||
_start_time TIMESTAMPTZ := clock_timestamp();
|
||||
_target_date DATE;
|
||||
_total_records INT := 0;
|
||||
_has_orders BOOLEAN := FALSE;
|
||||
_process_days INT := 5; -- Number of days to check/process (today plus previous 4 days)
|
||||
_day_counter INT;
|
||||
_missing_days INT[] := ARRAY[]::INT[]; -- Array to store days with missing or incomplete data
|
||||
_days_processed INT := 0;
|
||||
_max_backfill_days INT := 90; -- Safety cap: max days to backfill per run
|
||||
_recent_recheck_days INT := 2; -- Always reprocess this many recent days (today + yesterday)
|
||||
_latest_snapshot DATE;
|
||||
_backfill_start DATE;
|
||||
BEGIN
|
||||
-- Get the timestamp before the last successful run of this module
|
||||
SELECT last_calculation_timestamp INTO _last_calc_time
|
||||
FROM public.calculate_status
|
||||
WHERE module_name = _module_name;
|
||||
|
||||
RAISE NOTICE 'Running % script. Start Time: %', _module_name, _start_time;
|
||||
|
||||
-- First, check which days need processing by comparing orders data with snapshot data
|
||||
FOR _day_counter IN 0..(_process_days-1) LOOP
|
||||
_target_date := CURRENT_DATE - (_day_counter * INTERVAL '1 day');
|
||||
|
||||
-- Check if this date needs updating by comparing orders to snapshot data
|
||||
-- If the date has orders but not enough snapshots, or if snapshots show zero sales but orders exist, it's incomplete
|
||||
SELECT
|
||||
CASE WHEN (
|
||||
-- We have orders for this date but not enough snapshots, or snapshots with wrong total
|
||||
(EXISTS (SELECT 1 FROM public.orders WHERE date::date = _target_date) AND
|
||||
(
|
||||
-- No snapshots exist for this date
|
||||
NOT EXISTS (SELECT 1 FROM public.daily_product_snapshots WHERE snapshot_date = _target_date) OR
|
||||
-- Or snapshots show zero sales but orders exist
|
||||
(SELECT COALESCE(SUM(units_sold), 0) FROM public.daily_product_snapshots WHERE snapshot_date = _target_date) = 0 OR
|
||||
-- Or the count of snapshot records is significantly less than distinct products in orders
|
||||
(SELECT COUNT(*) FROM public.daily_product_snapshots WHERE snapshot_date = _target_date) <
|
||||
(SELECT COUNT(DISTINCT pid) FROM public.orders WHERE date::date = _target_date) * 0.8
|
||||
)
|
||||
)
|
||||
) THEN TRUE ELSE FALSE END
|
||||
INTO _has_orders;
|
||||
|
||||
IF _has_orders THEN
|
||||
-- This day needs processing - add to our array
|
||||
_missing_days := _missing_days || _day_counter;
|
||||
RAISE NOTICE 'Day % needs updating (incomplete or missing data)', _target_date;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
-- If no days need updating, exit early
|
||||
IF array_length(_missing_days, 1) IS NULL THEN
|
||||
RAISE NOTICE 'No days need updating - all snapshot data appears complete';
|
||||
|
||||
-- Still update the calculate_status to record this run
|
||||
UPDATE public.calculate_status
|
||||
SET last_calculation_timestamp = _start_time
|
||||
WHERE module_name = _module_name;
|
||||
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE 'Need to update % days with missing or incomplete data', array_length(_missing_days, 1);
|
||||
|
||||
-- Process only the days that need updating
|
||||
FOREACH _day_counter IN ARRAY _missing_days LOOP
|
||||
_target_date := CURRENT_DATE - (_day_counter * INTERVAL '1 day');
|
||||
RAISE NOTICE 'Processing date: %', _target_date;
|
||||
-- Find the latest existing snapshot date to determine where gaps begin
|
||||
SELECT MAX(snapshot_date) INTO _latest_snapshot
|
||||
FROM public.daily_product_snapshots;
|
||||
|
||||
-- Determine how far back to look for gaps, capped at _max_backfill_days
|
||||
_backfill_start := GREATEST(
|
||||
COALESCE(_latest_snapshot + 1, CURRENT_DATE - _max_backfill_days),
|
||||
CURRENT_DATE - _max_backfill_days
|
||||
);
|
||||
|
||||
IF _latest_snapshot IS NULL THEN
|
||||
RAISE NOTICE 'No existing snapshots found. Backfilling up to % days.', _max_backfill_days;
|
||||
ELSIF _backfill_start > _latest_snapshot + 1 THEN
|
||||
RAISE NOTICE 'Latest snapshot: %. Gap exceeds % day cap — backfilling from %. Use rebuild script for full history.',
|
||||
_latest_snapshot, _max_backfill_days, _backfill_start;
|
||||
ELSE
|
||||
RAISE NOTICE 'Latest snapshot: %. Checking for gaps from %.', _latest_snapshot, _backfill_start;
|
||||
END IF;
|
||||
|
||||
-- Process all dates that need snapshots:
|
||||
-- 1. Gap fill: dates with orders/receivings but no snapshots (older than recent window)
|
||||
-- 2. Recent recheck: last N days always reprocessed (picks up new orders, corrections)
|
||||
FOR _target_date IN
|
||||
SELECT d FROM (
|
||||
-- Gap fill: find dates with activity but missing snapshots
|
||||
SELECT activity_dates.d
|
||||
FROM (
|
||||
SELECT DISTINCT date::date AS d FROM public.orders
|
||||
WHERE date::date >= _backfill_start AND date::date < CURRENT_DATE - _recent_recheck_days
|
||||
UNION
|
||||
SELECT DISTINCT received_date::date AS d FROM public.receivings
|
||||
WHERE received_date::date >= _backfill_start AND received_date::date < CURRENT_DATE - _recent_recheck_days
|
||||
) activity_dates
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM public.daily_product_snapshots dps WHERE dps.snapshot_date = activity_dates.d
|
||||
)
|
||||
UNION
|
||||
-- Recent days: always reprocess
|
||||
SELECT d::date
|
||||
FROM generate_series(
|
||||
(CURRENT_DATE - _recent_recheck_days)::timestamp,
|
||||
CURRENT_DATE::timestamp,
|
||||
'1 day'::interval
|
||||
) d
|
||||
) dates_to_process
|
||||
ORDER BY d
|
||||
LOOP
|
||||
_days_processed := _days_processed + 1;
|
||||
RAISE NOTICE 'Processing date: % [%/%]', _target_date, _days_processed,
|
||||
_days_processed; -- count not known ahead of time, but shows progress
|
||||
|
||||
-- IMPORTANT: First delete any existing data for this date to prevent duplication
|
||||
DELETE FROM public.daily_product_snapshots
|
||||
@@ -90,7 +88,6 @@ BEGIN
|
||||
COALESCE(
|
||||
o.costeach, -- First use order-specific cost if available
|
||||
get_weighted_avg_cost(p.pid, o.date::date), -- Then use weighted average cost
|
||||
p.landing_cost_price, -- Fallback to landing cost
|
||||
p.cost_price -- Final fallback to current cost
|
||||
) * o.quantity
|
||||
ELSE 0 END), 0.00) AS cogs,
|
||||
@@ -128,7 +125,7 @@ BEGIN
|
||||
SELECT
|
||||
pid,
|
||||
stock_quantity,
|
||||
COALESCE(landing_cost_price, cost_price, 0.00) as effective_cost_price,
|
||||
COALESCE(cost_price, 0.00) as effective_cost_price,
|
||||
COALESCE(price, 0.00) as current_price,
|
||||
COALESCE(regular_price, 0.00) as current_regular_price
|
||||
FROM public.products
|
||||
@@ -181,7 +178,7 @@ BEGIN
|
||||
COALESCE(sd.gross_revenue_unadjusted, 0.00),
|
||||
COALESCE(sd.discounts, 0.00),
|
||||
COALESCE(sd.returns_revenue, 0.00),
|
||||
COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00) AS net_revenue,
|
||||
COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00) - COALESCE(sd.returns_revenue, 0.00) AS net_revenue,
|
||||
COALESCE(sd.cogs, 0.00),
|
||||
COALESCE(sd.gross_regular_revenue, 0.00),
|
||||
(COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00)) - COALESCE(sd.cogs, 0.00) AS profit, -- Basic profit: Net Revenue - COGS
|
||||
@@ -201,12 +198,18 @@ BEGIN
|
||||
RAISE NOTICE 'Created % daily snapshot records for % with sales/receiving activity', _total_records, _target_date;
|
||||
END LOOP;
|
||||
|
||||
-- Update the status table with the timestamp from the START of this run
|
||||
UPDATE public.calculate_status
|
||||
SET last_calculation_timestamp = _start_time
|
||||
WHERE module_name = _module_name;
|
||||
IF _days_processed = 0 THEN
|
||||
RAISE NOTICE 'No days need updating — all snapshot data is current.';
|
||||
ELSE
|
||||
RAISE NOTICE 'Processed % days total.', _days_processed;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE 'Finished % processing for multiple dates. Duration: %', _module_name, clock_timestamp() - _start_time;
|
||||
-- Update the status table with the timestamp from the START of this run
|
||||
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)
|
||||
VALUES (_module_name, _start_time)
|
||||
ON CONFLICT (module_name) DO UPDATE SET last_calculation_timestamp = _start_time;
|
||||
|
||||
RAISE NOTICE 'Finished % script. Duration: %', _module_name, clock_timestamp() - _start_time;
|
||||
|
||||
END $$;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user