Try to speed up calculate script + fixes

This commit is contained in:
2025-02-10 01:29:01 -05:00
parent 7ff757203f
commit 610e26689c
7 changed files with 520 additions and 260 deletions

View File

@@ -86,7 +86,8 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount
const [batch] = await connection.query(`
SELECT DISTINCT p.pid
FROM products p
LEFT JOIN orders o ON p.pid = o.pid AND o.updated > ?
FORCE INDEX (PRIMARY)
LEFT JOIN orders o FORCE INDEX (idx_orders_metrics) ON p.pid = o.pid AND o.updated > ?
WHERE p.visible = true
AND p.pid > ?
AND (
@@ -99,101 +100,160 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount
if (batch.length === 0) break;
// Calculate forecasts for this batch
// Create temporary tables for better performance
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_historical_sales');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_sales_stats');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_recent_trend');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_confidence_calc');
// Create optimized temporary tables with indexes
await connection.query(`
INSERT INTO sales_forecasts (
pid,
forecast_date,
forecast_units,
forecast_revenue,
confidence_level,
last_calculated_at
)
WITH historical_sales AS (
SELECT
o.pid,
DATE(o.date) as sale_date,
SUM(o.quantity) as daily_quantity,
SUM(o.quantity * o.price) as daily_revenue
FROM orders o
WHERE o.canceled = false
AND o.pid IN (?)
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 180 DAY)
GROUP BY o.pid, DATE(o.date)
),
sales_stats AS (
SELECT
pid,
AVG(daily_quantity) as avg_daily_units,
AVG(daily_revenue) as avg_daily_revenue,
STDDEV(daily_quantity) as std_daily_units,
COUNT(*) as days_with_sales,
MIN(sale_date) as first_sale,
MAX(sale_date) as last_sale
FROM historical_sales
GROUP BY pid
),
recent_trend AS (
SELECT
h.pid,
AVG(h.daily_quantity) as recent_avg_units,
AVG(h.daily_revenue) as recent_avg_revenue
FROM historical_sales h
WHERE h.sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY)
GROUP BY h.pid
),
confidence_calc AS (
SELECT
s.pid,
LEAST(100, GREATEST(0, ROUND(
(s.days_with_sales / 180.0 * 50) + -- Up to 50 points for history length
(CASE
WHEN s.std_daily_units = 0 OR s.avg_daily_units = 0 THEN 0
WHEN (s.std_daily_units / s.avg_daily_units) <= 0.5 THEN 30
WHEN (s.std_daily_units / s.avg_daily_units) <= 1.0 THEN 20
WHEN (s.std_daily_units / s.avg_daily_units) <= 2.0 THEN 10
ELSE 0
END) + -- Up to 30 points for consistency
(CASE
WHEN DATEDIFF(CURRENT_DATE, s.last_sale) <= 7 THEN 20
WHEN DATEDIFF(CURRENT_DATE, s.last_sale) <= 30 THEN 10
ELSE 0
END) -- Up to 20 points for recency
))) as confidence_level
FROM sales_stats s
)
(SELECT
s.pid,
DATE_ADD(CURRENT_DATE, INTERVAL n.days DAY) as forecast_date,
GREATEST(0, ROUND(
CASE
WHEN s.days_with_sales >= n.days THEN
COALESCE(t.recent_avg_units, s.avg_daily_units)
ELSE s.avg_daily_units * (s.days_with_sales / n.days)
END
)) as forecast_units,
GREATEST(0, ROUND(
CASE
WHEN s.days_with_sales >= n.days THEN
COALESCE(t.recent_avg_revenue, s.avg_daily_revenue)
ELSE s.avg_daily_revenue * (s.days_with_sales / n.days)
END
, 2)) as forecast_revenue,
c.confidence_level,
NOW() as last_calculated_at
FROM sales_stats s
CROSS JOIN (
SELECT 30 as days UNION SELECT 60 UNION SELECT 90
) n
LEFT JOIN recent_trend t ON s.pid = t.pid
LEFT JOIN confidence_calc c ON s.pid = c.pid)
ON DUPLICATE KEY UPDATE
forecast_units = VALUES(forecast_units),
forecast_revenue = VALUES(forecast_revenue),
confidence_level = VALUES(confidence_level),
last_calculated_at = NOW()
CREATE TEMPORARY TABLE temp_historical_sales (
pid BIGINT NOT NULL,
sale_date DATE NOT NULL,
daily_quantity INT,
daily_revenue DECIMAL(15,2),
PRIMARY KEY (pid, sale_date),
INDEX (sale_date)
) ENGINE=MEMORY
`);
await connection.query(`
CREATE TEMPORARY TABLE temp_sales_stats (
pid BIGINT NOT NULL,
avg_daily_units DECIMAL(10,2),
avg_daily_revenue DECIMAL(15,2),
std_daily_units DECIMAL(10,2),
days_with_sales INT,
first_sale DATE,
last_sale DATE,
PRIMARY KEY (pid),
INDEX (days_with_sales),
INDEX (last_sale)
) ENGINE=MEMORY
`);
await connection.query(`
CREATE TEMPORARY TABLE temp_recent_trend (
pid BIGINT NOT NULL,
recent_avg_units DECIMAL(10,2),
recent_avg_revenue DECIMAL(15,2),
PRIMARY KEY (pid)
) ENGINE=MEMORY
`);
await connection.query(`
CREATE TEMPORARY TABLE temp_confidence_calc (
pid BIGINT NOT NULL,
confidence_level TINYINT,
PRIMARY KEY (pid)
) ENGINE=MEMORY
`);
// Populate historical sales with optimized index usage
await connection.query(`
INSERT INTO temp_historical_sales
SELECT
o.pid,
DATE(o.date) as sale_date,
SUM(o.quantity) as daily_quantity,
SUM(o.quantity * o.price) as daily_revenue
FROM orders o
FORCE INDEX (idx_orders_metrics)
WHERE o.canceled = false
AND o.pid IN (?)
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 180 DAY)
GROUP BY o.pid, DATE(o.date)
`, [batch.map(row => row.pid)]);
// Populate sales stats
await connection.query(`
INSERT INTO temp_sales_stats
SELECT
pid,
AVG(daily_quantity) as avg_daily_units,
AVG(daily_revenue) as avg_daily_revenue,
STDDEV(daily_quantity) as std_daily_units,
COUNT(*) as days_with_sales,
MIN(sale_date) as first_sale,
MAX(sale_date) as last_sale
FROM temp_historical_sales
GROUP BY pid
`);
// Populate recent trend
await connection.query(`
INSERT INTO temp_recent_trend
SELECT
h.pid,
AVG(h.daily_quantity) as recent_avg_units,
AVG(h.daily_revenue) as recent_avg_revenue
FROM temp_historical_sales h
WHERE h.sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY)
GROUP BY h.pid
`);
// Calculate confidence levels
await connection.query(`
INSERT INTO temp_confidence_calc
SELECT
s.pid,
LEAST(100, GREATEST(0, ROUND(
(s.days_with_sales / 180.0 * 50) + -- Up to 50 points for history length
(CASE
WHEN s.std_daily_units = 0 OR s.avg_daily_units = 0 THEN 0
WHEN (s.std_daily_units / s.avg_daily_units) <= 0.5 THEN 30
WHEN (s.std_daily_units / s.avg_daily_units) <= 1.0 THEN 20
WHEN (s.std_daily_units / s.avg_daily_units) <= 2.0 THEN 10
ELSE 0
END) + -- Up to 30 points for consistency
(CASE
WHEN DATEDIFF(CURRENT_DATE, s.last_sale) <= 7 THEN 20
WHEN DATEDIFF(CURRENT_DATE, s.last_sale) <= 30 THEN 10
ELSE 0
END) -- Up to 20 points for recency
))) as confidence_level
FROM temp_sales_stats s
`);
// Generate forecasts using temp tables
await connection.query(`
REPLACE INTO sales_forecasts
(pid, forecast_date, forecast_units, forecast_revenue, confidence_level, last_calculated_at)
SELECT
s.pid,
DATE_ADD(CURRENT_DATE, INTERVAL n.days DAY),
GREATEST(0, ROUND(
CASE
WHEN s.days_with_sales >= n.days THEN COALESCE(t.recent_avg_units, s.avg_daily_units)
ELSE s.avg_daily_units * (s.days_with_sales / n.days)
END
)),
GREATEST(0, ROUND(
CASE
WHEN s.days_with_sales >= n.days THEN COALESCE(t.recent_avg_revenue, s.avg_daily_revenue)
ELSE s.avg_daily_revenue * (s.days_with_sales / n.days)
END,
2
)),
c.confidence_level,
NOW()
FROM temp_sales_stats s
CROSS JOIN (
SELECT 30 as days
UNION SELECT 60
UNION SELECT 90
) n
LEFT JOIN temp_recent_trend t ON s.pid = t.pid
LEFT JOIN temp_confidence_calc c ON s.pid = c.pid;
`);
// Clean up temp tables
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_historical_sales');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_sales_stats');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_recent_trend');
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_confidence_calc');
lastPid = batch[batch.length - 1].pid;
processedCount += batch.length;