Finish fixing calculate scripts

This commit is contained in:
2025-03-26 14:22:08 -04:00
parent 749907bd30
commit 8e19e6cd74
7 changed files with 295 additions and 375 deletions

View File

@@ -10,12 +10,13 @@ function sanitizeValue(value) {
}
async function calculateProductMetrics(startTime, totalProducts, processedCount = 0, isCancelled = false) {
const connection = await getConnection();
let connection;
let success = false;
let processedOrders = 0;
const BATCH_SIZE = 5000;
try {
connection = await getConnection();
// Skip flags are inherited from the parent scope
const SKIP_PRODUCT_BASE_METRICS = 0;
const SKIP_PRODUCT_TIME_AGGREGATES = 0;
@@ -147,33 +148,58 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
GROUP BY p.pid
`);
// Populate temp_purchase_metrics
await connection.query(`
INSERT INTO temp_purchase_metrics
SELECT
p.pid,
AVG(
CASE
WHEN po.received_date IS NOT NULL AND po.date IS NOT NULL
THEN EXTRACT(EPOCH FROM (po.received_date::timestamp with time zone - po.date::timestamp with time zone)) / 86400.0
ELSE NULL
END
) as avg_lead_time_days,
MAX(po.date) as last_purchase_date,
MIN(po.received_date) as first_received_date,
MAX(po.received_date) as last_received_date
FROM products p
LEFT JOIN purchase_orders po ON p.pid = po.pid
AND po.received_date IS NOT NULL
AND po.date >= CURRENT_DATE - INTERVAL '365 days'
GROUP BY p.pid
`);
// Populate temp_purchase_metrics with timeout protection
await Promise.race([
connection.query(`
INSERT INTO temp_purchase_metrics
SELECT
p.pid,
AVG(
CASE
WHEN po.received_date IS NOT NULL AND po.date IS NOT NULL
THEN EXTRACT(EPOCH FROM (po.received_date::timestamp with time zone - po.date::timestamp with time zone)) / 86400.0
ELSE NULL
END
) as avg_lead_time_days,
MAX(po.date) as last_purchase_date,
MIN(po.received_date) as first_received_date,
MAX(po.received_date) as last_received_date
FROM products p
LEFT JOIN purchase_orders po ON p.pid = po.pid
AND po.received_date IS NOT NULL
AND po.date IS NOT NULL
AND po.date >= CURRENT_DATE - INTERVAL '365 days'
GROUP BY p.pid
`),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout: temp_purchase_metrics query took too long')), 60000)
)
]).catch(async (err) => {
logError(err, 'Error populating temp_purchase_metrics, continuing with empty table');
// Create an empty fallback to continue processing
await connection.query(`
INSERT INTO temp_purchase_metrics
SELECT
p.pid,
30.0 as avg_lead_time_days,
NULL as last_purchase_date,
NULL as first_received_date,
NULL as last_received_date
FROM products p
LEFT JOIN temp_purchase_metrics tpm ON p.pid = tpm.pid
WHERE tpm.pid IS NULL
`);
});
// Process updates in batches
let lastPid = 0;
while (true) {
let batchCount = 0;
const MAX_BATCHES = 1000; // Safety limit for number of batches to prevent infinite loops
while (batchCount < MAX_BATCHES) {
if (isCancelled) break;
batchCount++;
const batch = await connection.query(
'SELECT pid FROM products WHERE pid > $1 ORDER BY pid LIMIT $2',
[lastPid, BATCH_SIZE]
@@ -181,6 +207,7 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
if (batch.rows.length === 0) break;
// Process the entire batch in a single efficient query
await connection.query(`
UPDATE product_metrics pm
SET
@@ -241,6 +268,7 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
LEFT JOIN temp_sales_metrics sm ON p.pid = sm.pid
LEFT JOIN temp_purchase_metrics lm ON p.pid = lm.pid
WHERE p.pid = ANY($11::bigint[])
AND pm.pid = p.pid
`,
[
defaultThresholds.low_stock_threshold,
@@ -254,8 +282,7 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
defaultThresholds.overstock_days,
defaultThresholds.overstock_days,
batch.rows.map(row => row.pid)
]
);
]);
lastPid = batch.rows[batch.rows.length - 1].pid;
processedCount += batch.rows.length;
@@ -277,54 +304,59 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
});
}
// Calculate forecast accuracy and bias in batches
lastPid = 0;
while (true) {
if (isCancelled) break;
const batch = await connection.query(
'SELECT pid FROM products WHERE pid > $1 ORDER BY pid LIMIT $2',
[lastPid, BATCH_SIZE]
);
if (batch.rows.length === 0) break;
await connection.query(`
UPDATE product_metrics pm
SET
forecast_accuracy = GREATEST(0, 100 - LEAST(fa.avg_forecast_error, 100)),
forecast_bias = GREATEST(-100, LEAST(fa.avg_forecast_bias, 100)),
last_forecast_date = fa.last_forecast_date,
last_calculated_at = NOW()
FROM (
SELECT
sf.pid,
AVG(CASE
WHEN o.quantity > 0
THEN ABS(sf.forecast_quantity - o.quantity) / o.quantity * 100
ELSE 100
END) as avg_forecast_error,
AVG(CASE
WHEN o.quantity > 0
THEN (sf.forecast_quantity - o.quantity) / o.quantity * 100
ELSE 0
END) as avg_forecast_bias,
MAX(sf.forecast_date) as last_forecast_date
FROM sales_forecasts sf
JOIN orders o ON sf.pid = o.pid
AND DATE(o.date) = sf.forecast_date
WHERE o.canceled = false
AND sf.forecast_date >= CURRENT_DATE - INTERVAL '90 days'
AND sf.pid = ANY($1::bigint[])
GROUP BY sf.pid
) fa
WHERE pm.pid = fa.pid
`, [batch.rows.map(row => row.pid)]);
lastPid = batch.rows[batch.rows.length - 1].pid;
// Add safety check if the loop processed MAX_BATCHES
if (batchCount >= MAX_BATCHES) {
logError(new Error(`Reached maximum batch count (${MAX_BATCHES}). Process may have entered an infinite loop.`), 'Batch processing safety limit reached');
}
}
// Calculate forecast accuracy and bias in batches
lastPid = 0;
while (true) {
if (isCancelled) break;
const batch = await connection.query(
'SELECT pid FROM products WHERE pid > $1 ORDER BY pid LIMIT $2',
[lastPid, BATCH_SIZE]
);
if (batch.rows.length === 0) break;
await connection.query(`
UPDATE product_metrics pm
SET
forecast_accuracy = GREATEST(0, 100 - LEAST(fa.avg_forecast_error, 100)),
forecast_bias = GREATEST(-100, LEAST(fa.avg_forecast_bias, 100)),
last_forecast_date = fa.last_forecast_date,
last_calculated_at = NOW()
FROM (
SELECT
sf.pid,
AVG(CASE
WHEN o.quantity > 0
THEN ABS(sf.forecast_quantity - o.quantity) / o.quantity * 100
ELSE 100
END) as avg_forecast_error,
AVG(CASE
WHEN o.quantity > 0
THEN (sf.forecast_quantity - o.quantity) / o.quantity * 100
ELSE 0
END) as avg_forecast_bias,
MAX(sf.forecast_date) as last_forecast_date
FROM sales_forecasts sf
JOIN orders o ON sf.pid = o.pid
AND DATE(o.date) = sf.forecast_date
WHERE o.canceled = false
AND sf.forecast_date >= CURRENT_DATE - INTERVAL '90 days'
AND sf.pid = ANY($1::bigint[])
GROUP BY sf.pid
) fa
WHERE pm.pid = fa.pid
`, [batch.rows.map(row => row.pid)]);
lastPid = batch.rows[batch.rows.length - 1].pid;
}
// Calculate product time aggregates
if (!SKIP_PRODUCT_TIME_AGGREGATES) {
outputProgress({
@@ -360,11 +392,11 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
)
SELECT
p.pid,
EXTRACT(YEAR FROM o.date) as year,
EXTRACT(MONTH FROM o.date) as month,
EXTRACT(YEAR FROM o.date::timestamp with time zone) as year,
EXTRACT(MONTH FROM o.date::timestamp with time zone) as month,
SUM(o.quantity) as total_quantity_sold,
SUM(o.quantity * o.price) as total_revenue,
SUM(o.quantity * p.cost_price) as total_cost,
SUM(o.price * o.quantity) as total_revenue,
SUM(p.cost_price * o.quantity) as total_cost,
COUNT(DISTINCT o.order_number) as order_count,
AVG(o.price) as avg_price,
CASE
@@ -381,7 +413,7 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
FROM products p
LEFT JOIN orders o ON p.pid = o.pid AND o.canceled = false
WHERE o.date >= CURRENT_DATE - INTERVAL '12 months'
GROUP BY p.pid, EXTRACT(YEAR FROM o.date), EXTRACT(MONTH FROM o.date)
GROUP BY p.pid, EXTRACT(YEAR FROM o.date::timestamp with time zone), EXTRACT(MONTH FROM o.date::timestamp with time zone)
ON CONFLICT (pid, year, month) DO UPDATE
SET
total_quantity_sold = EXCLUDED.total_quantity_sold,
@@ -500,17 +532,18 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
// Get total count for percentage calculation
const rankingCount = await connection.query('SELECT MAX(rank_num) as total_count FROM temp_revenue_ranks');
const totalCount = parseInt(rankingCount.rows[0].total_count) || 1;
const max_rank = totalCount;
// Process updates in batches
let abcProcessedCount = 0;
const batchSize = 5000;
const maxPid = await connection.query('SELECT MAX(pid) as max_pid FROM products');
const maxProductId = parseInt(maxPid.rows[0].max_pid);
while (true) {
while (abcProcessedCount < maxProductId) {
if (isCancelled) return {
processedProducts: processedCount,
processedOrders,
processedPurchaseOrders: 0, // This module doesn't process POs
processedPurchaseOrders: 0,
success
};
@@ -519,16 +552,18 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
SELECT pm.pid
FROM product_metrics pm
LEFT JOIN temp_revenue_ranks tr ON pm.pid = tr.pid
WHERE pm.abc_class IS NULL
OR pm.abc_class !=
CASE
WHEN tr.pid IS NULL THEN 'C'
WHEN tr.percentile <= $1 THEN 'A'
WHEN tr.percentile <= $2 THEN 'B'
ELSE 'C'
END
LIMIT $3
`, [abcThresholds.a_threshold, abcThresholds.b_threshold, batchSize]);
WHERE pm.pid > $1
AND (pm.abc_class IS NULL
OR pm.abc_class !=
CASE
WHEN tr.pid IS NULL THEN 'C'
WHEN tr.percentile <= $2 THEN 'A'
WHEN tr.percentile <= $3 THEN 'B'
ELSE 'C'
END)
ORDER BY pm.pid
LIMIT $4
`, [abcProcessedCount, abcThresholds.a_threshold, abcThresholds.b_threshold, batchSize]);
if (pids.rows.length === 0) break;
@@ -581,10 +616,10 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
WHERE pm.pid = sales.pid
`, [pidValues]);
abcProcessedCount += pids.rows.length;
abcProcessedCount = pids.rows[pids.rows.length - 1].pid;
// Calculate progress proportionally to batch size
processedCount = Math.floor(totalProducts * (0.60 + (abcProcessedCount / totalProducts) * 0.2));
// Calculate progress proportionally to total products
processedCount = Math.floor(totalProducts * (0.60 + (abcProcessedCount / maxProductId) * 0.2));
outputProgress({
status: 'running',
@@ -626,7 +661,16 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
logError(error, 'Error calculating product metrics');
throw error;
} finally {
// Always clean up temporary tables, even if an error occurred
if (connection) {
try {
await connection.query('DROP TABLE IF EXISTS temp_sales_metrics');
await connection.query('DROP TABLE IF EXISTS temp_purchase_metrics');
} catch (err) {
console.error('Error cleaning up temporary tables:', err);
}
// Make sure to release the connection
connection.release();
}
}