Optimize and fix calculate scripts

This commit is contained in:
2025-01-27 13:16:21 -05:00
parent 5781b45f37
commit 8323ae7703
10 changed files with 748 additions and 962 deletions

View File

@@ -15,6 +15,59 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount)
percentage: '98'
});
// First, create a temporary table for forecast dates
await connection.query(`
CREATE TEMPORARY TABLE IF NOT EXISTS temp_forecast_dates (
forecast_date DATE,
day_of_week INT,
month INT,
PRIMARY KEY (forecast_date)
)
`);
await connection.query(`
INSERT INTO temp_forecast_dates
SELECT
DATE_ADD(CURRENT_DATE, INTERVAL n DAY) as forecast_date,
DAYOFWEEK(DATE_ADD(CURRENT_DATE, INTERVAL n DAY)) as day_of_week,
MONTH(DATE_ADD(CURRENT_DATE, INTERVAL n DAY)) as month
FROM (
SELECT a.N + b.N * 10 as n
FROM
(SELECT 0 as N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION
SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a,
(SELECT 0 as N UNION SELECT 1 UNION SELECT 2) b
ORDER BY n
LIMIT 31
) numbers
`);
// Create temporary table for daily sales stats
await connection.query(`
CREATE TEMPORARY TABLE IF NOT EXISTS temp_daily_sales AS
SELECT
o.pid,
DAYOFWEEK(o.date) as day_of_week,
SUM(o.quantity) as daily_quantity,
SUM(o.price * o.quantity) as daily_revenue,
COUNT(DISTINCT DATE(o.date)) as day_count
FROM orders o
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY)
GROUP BY o.pid, DAYOFWEEK(o.date)
`);
// Create temporary table for product stats
await connection.query(`
CREATE TEMPORARY TABLE IF NOT EXISTS temp_product_stats AS
SELECT
pid,
AVG(daily_revenue) as overall_avg_revenue,
SUM(day_count) as total_days
FROM temp_daily_sales
GROUP BY pid
`);
// Calculate product-level forecasts
await connection.query(`
INSERT INTO sales_forecasts (
@@ -25,92 +78,37 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount)
confidence_level,
last_calculated_at
)
WITH daily_sales AS (
SELECT
o.pid,
DATE(o.date) as sale_date,
SUM(o.quantity) as daily_quantity,
SUM(o.price * o.quantity) as daily_revenue
FROM orders o
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY)
GROUP BY o.pid, DATE(o.date)
),
forecast_dates AS (
SELECT
DATE_ADD(CURRENT_DATE, INTERVAL n DAY) as forecast_date
FROM (
SELECT 0 as n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION
SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION
SELECT 10 UNION SELECT 11 UNION SELECT 12 UNION SELECT 13 UNION SELECT 14 UNION
SELECT 15 UNION SELECT 16 UNION SELECT 17 UNION SELECT 18 UNION SELECT 19 UNION
SELECT 20 UNION SELECT 21 UNION SELECT 22 UNION SELECT 23 UNION SELECT 24 UNION
SELECT 25 UNION SELECT 26 UNION SELECT 27 UNION SELECT 28 UNION SELECT 29 UNION
SELECT 30
) numbers
),
product_stats AS (
SELECT
ds.pid,
AVG(ds.daily_quantity) as avg_daily_quantity,
STDDEV_SAMP(ds.daily_quantity) as std_daily_quantity,
AVG(ds.daily_revenue) as avg_daily_revenue,
STDDEV_SAMP(ds.daily_revenue) as std_daily_revenue,
COUNT(*) as data_points,
-- Calculate day-of-week averages
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 1 THEN ds.daily_revenue END) as sunday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 2 THEN ds.daily_revenue END) as monday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 3 THEN ds.daily_revenue END) as tuesday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 4 THEN ds.daily_revenue END) as wednesday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 5 THEN ds.daily_revenue END) as thursday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 6 THEN ds.daily_revenue END) as friday_avg,
AVG(CASE WHEN DAYOFWEEK(ds.sale_date) = 7 THEN ds.daily_revenue END) as saturday_avg
FROM daily_sales ds
GROUP BY ds.pid
)
SELECT
ps.pid,
ds.pid,
fd.forecast_date,
GREATEST(0,
ps.avg_daily_quantity *
(1 + COALESCE(
(SELECT seasonality_factor
FROM sales_seasonality
WHERE MONTH(fd.forecast_date) = month
LIMIT 1),
0
))
AVG(ds.daily_quantity) *
(1 + COALESCE(sf.seasonality_factor, 0))
) as forecast_units,
GREATEST(0,
CASE DAYOFWEEK(fd.forecast_date)
WHEN 1 THEN COALESCE(ps.sunday_avg, ps.avg_daily_revenue)
WHEN 2 THEN COALESCE(ps.monday_avg, ps.avg_daily_revenue)
WHEN 3 THEN COALESCE(ps.tuesday_avg, ps.avg_daily_revenue)
WHEN 4 THEN COALESCE(ps.wednesday_avg, ps.avg_daily_revenue)
WHEN 5 THEN COALESCE(ps.thursday_avg, ps.avg_daily_revenue)
WHEN 6 THEN COALESCE(ps.friday_avg, ps.avg_daily_revenue)
WHEN 7 THEN COALESCE(ps.saturday_avg, ps.avg_daily_revenue)
END *
(1 + COALESCE(
(SELECT seasonality_factor
FROM sales_seasonality
WHERE MONTH(fd.forecast_date) = month
LIMIT 1),
COALESCE(
CASE
WHEN SUM(ds.day_count) >= 4 THEN AVG(ds.daily_revenue)
ELSE ps.overall_avg_revenue
END *
(1 + COALESCE(sf.seasonality_factor, 0)) *
(0.95 + (RAND() * 0.1)),
0
)) *
-- Add some randomness within a small range (±5%)
(0.95 + (RAND() * 0.1))
)
) as forecast_revenue,
CASE
WHEN ps.data_points >= 60 THEN 90
WHEN ps.data_points >= 30 THEN 80
WHEN ps.data_points >= 14 THEN 70
WHEN ps.total_days >= 60 THEN 90
WHEN ps.total_days >= 30 THEN 80
WHEN ps.total_days >= 14 THEN 70
ELSE 60
END as confidence_level,
NOW() as last_calculated_at
FROM product_stats ps
CROSS JOIN forecast_dates fd
WHERE ps.avg_daily_quantity > 0
FROM temp_daily_sales ds
JOIN temp_product_stats ps ON ds.pid = ps.pid
CROSS JOIN temp_forecast_dates fd
LEFT JOIN sales_seasonality sf ON fd.month = sf.month
GROUP BY ds.pid, fd.forecast_date, ps.overall_avg_revenue, ps.total_days, sf.seasonality_factor
HAVING AVG(ds.daily_quantity) > 0
ON DUPLICATE KEY UPDATE
forecast_units = VALUES(forecast_units),
forecast_revenue = VALUES(forecast_revenue),
@@ -118,6 +116,32 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount)
last_calculated_at = NOW()
`);
// Create temporary table for category stats
await connection.query(`
CREATE TEMPORARY TABLE IF NOT EXISTS temp_category_sales AS
SELECT
pc.cat_id,
DAYOFWEEK(o.date) as day_of_week,
SUM(o.quantity) as daily_quantity,
SUM(o.price * o.quantity) as daily_revenue,
COUNT(DISTINCT DATE(o.date)) as day_count
FROM orders o
JOIN product_categories pc ON o.pid = pc.pid
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY)
GROUP BY pc.cat_id, DAYOFWEEK(o.date)
`);
await connection.query(`
CREATE TEMPORARY TABLE IF NOT EXISTS temp_category_stats AS
SELECT
cat_id,
AVG(daily_revenue) as overall_avg_revenue,
SUM(day_count) as total_days
FROM temp_category_sales
GROUP BY cat_id
`);
// Calculate category-level forecasts
await connection.query(`
INSERT INTO category_forecasts (
@@ -128,93 +152,37 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount)
confidence_level,
last_calculated_at
)
WITH category_daily_sales AS (
SELECT
pc.cat_id as category_id,
DATE(o.date) as sale_date,
SUM(o.quantity) as daily_quantity,
SUM(o.price * o.quantity) as daily_revenue
FROM orders o
JOIN product_categories pc ON o.pid = pc.pid
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY)
GROUP BY pc.cat_id, DATE(o.date)
),
forecast_dates AS (
SELECT
DATE_ADD(CURRENT_DATE, INTERVAL n DAY) as forecast_date
FROM (
SELECT 0 as n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION
SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION
SELECT 10 UNION SELECT 11 UNION SELECT 12 UNION SELECT 13 UNION SELECT 14 UNION
SELECT 15 UNION SELECT 16 UNION SELECT 17 UNION SELECT 18 UNION SELECT 19 UNION
SELECT 20 UNION SELECT 21 UNION SELECT 22 UNION SELECT 23 UNION SELECT 24 UNION
SELECT 25 UNION SELECT 26 UNION SELECT 27 UNION SELECT 28 UNION SELECT 29 UNION
SELECT 30
) numbers
),
category_stats AS (
SELECT
cds.category_id,
AVG(cds.daily_quantity) as avg_daily_quantity,
STDDEV_SAMP(cds.daily_quantity) as std_daily_quantity,
AVG(cds.daily_revenue) as avg_daily_revenue,
STDDEV_SAMP(cds.daily_revenue) as std_daily_revenue,
COUNT(*) as data_points,
-- Calculate day-of-week averages
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 1 THEN cds.daily_revenue END) as sunday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 2 THEN cds.daily_revenue END) as monday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 3 THEN cds.daily_revenue END) as tuesday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 4 THEN cds.daily_revenue END) as wednesday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 5 THEN cds.daily_revenue END) as thursday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 6 THEN cds.daily_revenue END) as friday_avg,
AVG(CASE WHEN DAYOFWEEK(cds.sale_date) = 7 THEN cds.daily_revenue END) as saturday_avg
FROM category_daily_sales cds
GROUP BY cds.category_id
)
SELECT
cs.category_id,
cs.cat_id as category_id,
fd.forecast_date,
GREATEST(0,
cs.avg_daily_quantity *
(1 + COALESCE(
(SELECT seasonality_factor
FROM sales_seasonality
WHERE MONTH(fd.forecast_date) = month
LIMIT 1),
0
))
AVG(cs.daily_quantity) *
(1 + COALESCE(sf.seasonality_factor, 0))
) as forecast_units,
GREATEST(0,
CASE DAYOFWEEK(fd.forecast_date)
WHEN 1 THEN COALESCE(cs.sunday_avg, cs.avg_daily_revenue)
WHEN 2 THEN COALESCE(cs.monday_avg, cs.avg_daily_revenue)
WHEN 3 THEN COALESCE(cs.tuesday_avg, cs.avg_daily_revenue)
WHEN 4 THEN COALESCE(cs.wednesday_avg, cs.avg_daily_revenue)
WHEN 5 THEN COALESCE(cs.thursday_avg, cs.avg_daily_revenue)
WHEN 6 THEN COALESCE(cs.friday_avg, cs.avg_daily_revenue)
WHEN 7 THEN COALESCE(cs.saturday_avg, cs.avg_daily_revenue)
END *
(1 + COALESCE(
(SELECT seasonality_factor
FROM sales_seasonality
WHERE MONTH(fd.forecast_date) = month
LIMIT 1),
COALESCE(
CASE
WHEN SUM(cs.day_count) >= 4 THEN AVG(cs.daily_revenue)
ELSE ct.overall_avg_revenue
END *
(1 + COALESCE(sf.seasonality_factor, 0)) *
(0.95 + (RAND() * 0.1)),
0
)) *
-- Add some randomness within a small range (±5%)
(0.95 + (RAND() * 0.1))
)
) as forecast_revenue,
CASE
WHEN cs.data_points >= 60 THEN 90
WHEN cs.data_points >= 30 THEN 80
WHEN cs.data_points >= 14 THEN 70
WHEN ct.total_days >= 60 THEN 90
WHEN ct.total_days >= 30 THEN 80
WHEN ct.total_days >= 14 THEN 70
ELSE 60
END as confidence_level,
NOW() as last_calculated_at
FROM category_stats cs
CROSS JOIN forecast_dates fd
WHERE cs.avg_daily_quantity > 0
FROM temp_category_sales cs
JOIN temp_category_stats ct ON cs.cat_id = ct.cat_id
CROSS JOIN temp_forecast_dates fd
LEFT JOIN sales_seasonality sf ON fd.month = sf.month
GROUP BY cs.cat_id, fd.forecast_date, ct.overall_avg_revenue, ct.total_days, sf.seasonality_factor
HAVING AVG(cs.daily_quantity) > 0
ON DUPLICATE KEY UPDATE
forecast_units = VALUES(forecast_units),
forecast_revenue = VALUES(forecast_revenue),
@@ -222,6 +190,15 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount)
last_calculated_at = NOW()
`);
// Clean up temporary tables
await connection.query(`
DROP TEMPORARY TABLE IF EXISTS temp_forecast_dates;
DROP TEMPORARY TABLE IF EXISTS temp_daily_sales;
DROP TEMPORARY TABLE IF EXISTS temp_product_stats;
DROP TEMPORARY TABLE IF EXISTS temp_category_sales;
DROP TEMPORARY TABLE IF EXISTS temp_category_stats;
`);
return Math.floor(totalProducts * 1.0);
} finally {
connection.release();