Files
inventory/inventory-server/scripts/metrics-new/backfill/rebuild_daily_snapshots.sql

152 lines
7.3 KiB
SQL

-- Description: Rebuilds daily product snapshots from scratch using real orders data.
-- Fixes issues with duplicated/inflated metrics.
-- Dependencies: Core import tables (products, orders, receivings).
-- Frequency: One-time run to clear out problematic data.
DO $$
DECLARE
_module_name TEXT := 'rebuild_daily_snapshots';
_start_time TIMESTAMPTZ := clock_timestamp();
_date DATE;
_count INT;
_total_records INT := 0;
_begin_date DATE := (SELECT MIN(date)::date FROM orders WHERE date >= '2024-01-01'); -- Starting point for data rebuild
_end_date DATE := CURRENT_DATE;
BEGIN
RAISE NOTICE 'Beginning daily snapshots rebuild from % to %. Starting at %', _begin_date, _end_date, _start_time;
-- First truncate the existing snapshots to ensure a clean slate
TRUNCATE TABLE public.daily_product_snapshots;
RAISE NOTICE 'Cleared existing snapshot data';
-- Now rebuild the snapshots day by day
_date := _begin_date;
WHILE _date <= _end_date LOOP
RAISE NOTICE 'Processing date %...', _date;
-- Create snapshots for this date
WITH SalesData AS (
SELECT
p.pid,
p.sku,
-- Count orders to ensure we only include products with real activity
COUNT(o.id) as order_count,
-- Aggregate Sales (Quantity > 0, Status not Canceled/Returned)
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.quantity ELSE 0 END), 0) AS units_sold,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.price * o.quantity ELSE 0 END), 0.00) AS gross_revenue_unadjusted,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.discount ELSE 0 END), 0.00) AS discounts,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN COALESCE(o.costeach, p.landing_cost_price, p.cost_price) * o.quantity ELSE 0 END), 0.00) AS cogs,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN p.regular_price * o.quantity ELSE 0 END), 0.00) AS gross_regular_revenue,
-- Aggregate Returns (Quantity < 0 or Status = Returned)
COALESCE(SUM(CASE WHEN o.quantity < 0 OR COALESCE(o.status, 'pending') = 'returned' THEN ABS(o.quantity) ELSE 0 END), 0) AS units_returned,
COALESCE(SUM(CASE WHEN o.quantity < 0 OR COALESCE(o.status, 'pending') = 'returned' THEN o.price * ABS(o.quantity) ELSE 0 END), 0.00) AS returns_revenue
FROM public.products p
LEFT JOIN public.orders o
ON p.pid = o.pid
AND o.date::date = _date
GROUP BY p.pid, p.sku
HAVING COUNT(o.id) > 0 -- Only include products with actual orders for this date
),
ReceivingData AS (
SELECT
r.pid,
-- Count receiving documents to ensure we only include products with real activity
COUNT(DISTINCT r.receiving_id) as receiving_count,
-- Calculate received quantity for this day
SUM(r.qty_each) AS units_received,
-- Calculate received cost for this day
SUM(r.qty_each * r.cost_each) AS cost_received
FROM public.receivings r
WHERE r.received_date::date = _date
GROUP BY r.pid
HAVING COUNT(DISTINCT r.receiving_id) > 0 OR SUM(r.qty_each) > 0
),
-- Get stock quantities for the day - note this is approximate since we're using current products data
StockData AS (
SELECT
p.pid,
p.stock_quantity,
COALESCE(p.landing_cost_price, p.cost_price, 0.00) as effective_cost_price,
COALESCE(p.price, 0.00) as current_price,
COALESCE(p.regular_price, 0.00) as current_regular_price
FROM public.products p
)
INSERT INTO public.daily_product_snapshots (
snapshot_date,
pid,
sku,
eod_stock_quantity,
eod_stock_cost,
eod_stock_retail,
eod_stock_gross,
stockout_flag,
units_sold,
units_returned,
gross_revenue,
discounts,
returns_revenue,
net_revenue,
cogs,
gross_regular_revenue,
profit,
units_received,
cost_received,
calculation_timestamp
)
SELECT
_date AS snapshot_date,
COALESCE(sd.pid, rd.pid) AS pid,
sd.sku,
-- Use current stock as approximation, since historical stock data may not be available
s.stock_quantity AS eod_stock_quantity,
s.stock_quantity * s.effective_cost_price AS eod_stock_cost,
s.stock_quantity * s.current_price AS eod_stock_retail,
s.stock_quantity * s.current_regular_price AS eod_stock_gross,
(s.stock_quantity <= 0) AS stockout_flag,
-- Sales metrics
COALESCE(sd.units_sold, 0),
COALESCE(sd.units_returned, 0),
COALESCE(sd.gross_revenue_unadjusted, 0.00),
COALESCE(sd.discounts, 0.00),
COALESCE(sd.returns_revenue, 0.00),
COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00) AS net_revenue,
COALESCE(sd.cogs, 0.00),
COALESCE(sd.gross_regular_revenue, 0.00),
(COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00)) - COALESCE(sd.cogs, 0.00) AS profit,
-- Receiving metrics
COALESCE(rd.units_received, 0),
COALESCE(rd.cost_received, 0.00),
_start_time
FROM SalesData sd
FULL OUTER JOIN ReceivingData rd ON sd.pid = rd.pid
LEFT JOIN StockData s ON COALESCE(sd.pid, rd.pid) = s.pid
WHERE (COALESCE(sd.order_count, 0) > 0 OR COALESCE(rd.receiving_count, 0) > 0);
-- Get record count for this day
GET DIAGNOSTICS _count = ROW_COUNT;
_total_records := _total_records + _count;
RAISE NOTICE 'Added % snapshot records for date %', _count, _date;
-- Move to next day
_date := _date + INTERVAL '1 day';
END LOOP;
RAISE NOTICE 'Rebuilding daily snapshots complete. Added % total records across % days.', _total_records, (_end_date - _begin_date)::integer + 1;
-- Update the status table for daily_snapshots
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)
VALUES ('daily_snapshots', _start_time)
ON CONFLICT (module_name) DO UPDATE SET last_calculation_timestamp = _start_time;
-- Now update product_metrics based on the rebuilt snapshots
RAISE NOTICE 'Triggering update of product_metrics table...';
-- Call the update_product_metrics procedure directly
-- Your system might use a different method to trigger this update
PERFORM pg_notify('recalculate_metrics', 'product_metrics');
RAISE NOTICE 'Rebuild complete. Duration: %', clock_timestamp() - _start_time;
END $$;