Make calculations incremental

This commit is contained in:
2025-02-09 13:35:44 -05:00
parent 2a6a0d0a87
commit 843ce71506
9 changed files with 935 additions and 1654 deletions

View File

@@ -4,19 +4,52 @@ const { getConnection } = require('./utils/db');
async function calculateCategoryMetrics(startTime, totalProducts, processedCount = 0, isCancelled = false) {
const connection = await getConnection();
let success = false;
let processedOrders = 0;
const BATCH_SIZE = 5000;
try {
// Get last calculation timestamp
const [lastCalc] = await connection.query(`
SELECT last_calculation_timestamp
FROM calculate_status
WHERE module_name = 'category_metrics'
`);
const lastCalculationTime = lastCalc[0]?.last_calculation_timestamp || '1970-01-01';
// Get total count of categories needing updates
const [categoryCount] = await connection.query(`
SELECT COUNT(DISTINCT c.cat_id) as count
FROM categories c
JOIN product_categories pc ON c.cat_id = pc.cat_id
LEFT JOIN products p ON pc.pid = p.pid AND p.updated > ?
LEFT JOIN orders o ON p.pid = o.pid AND o.updated > ?
WHERE c.status = 'active'
AND (
p.pid IS NOT NULL
OR o.id IS NOT NULL
)
`, [lastCalculationTime, lastCalculationTime]);
const totalCategories = categoryCount[0].count;
if (totalCategories === 0) {
console.log('No categories need metric updates');
return {
processedProducts: 0,
processedOrders: 0,
processedPurchaseOrders: 0,
success: true
};
}
if (isCancelled) {
outputProgress({
status: 'cancelled',
operation: 'Category metrics calculation cancelled',
current: processedCount,
total: totalProducts,
total: totalCategories,
elapsed: formatElapsedTime(startTime),
remaining: null,
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
percentage: ((processedCount / totalCategories) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
@@ -31,69 +64,15 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
};
}
// Get order count that will be processed
const [orderCount] = await connection.query(`
SELECT COUNT(*) as count
FROM orders o
WHERE o.canceled = false
`);
processedOrders = orderCount[0].count;
outputProgress({
status: 'running',
operation: 'Starting category metrics calculation',
current: processedCount,
total: totalProducts,
total: totalCategories,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
remaining: estimateRemaining(startTime, processedCount, totalCategories),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
// First, calculate base category metrics
await connection.query(`
INSERT INTO category_metrics (
category_id,
product_count,
active_products,
total_value,
status,
last_calculated_at
)
SELECT
c.cat_id,
COUNT(DISTINCT p.pid) as product_count,
COUNT(DISTINCT CASE WHEN p.visible = true THEN p.pid END) as active_products,
COALESCE(SUM(p.stock_quantity * p.cost_price), 0) as total_value,
c.status,
NOW() as last_calculated_at
FROM categories c
LEFT JOIN product_categories pc ON c.cat_id = pc.cat_id
LEFT JOIN products p ON pc.pid = p.pid
GROUP BY c.cat_id, c.status
ON DUPLICATE KEY UPDATE
product_count = VALUES(product_count),
active_products = VALUES(active_products),
total_value = VALUES(total_value),
status = VALUES(status),
last_calculated_at = VALUES(last_calculated_at)
`);
processedCount = Math.floor(totalProducts * 0.90);
outputProgress({
status: 'running',
operation: 'Base category metrics calculated, updating with margin data',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
percentage: ((processedCount / totalCategories) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
@@ -101,395 +80,99 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
}
});
if (isCancelled) return {
processedProducts: processedCount,
processedOrders,
processedPurchaseOrders: 0,
success
};
// Process in batches
let lastCatId = 0;
while (true) {
if (isCancelled) break;
// Then update with margin and turnover data
await connection.query(`
WITH category_sales AS (
SELECT
pc.cat_id,
SUM(o.quantity * o.price) as total_sales,
SUM(o.quantity * (o.price - p.cost_price)) as total_margin,
SUM(o.quantity) as units_sold,
AVG(GREATEST(p.stock_quantity, 0)) as avg_stock,
COUNT(DISTINCT DATE(o.date)) as active_days
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
LEFT JOIN turnover_config tc ON
(tc.category_id = pc.cat_id AND tc.vendor = p.vendor) OR
(tc.category_id = pc.cat_id AND tc.vendor IS NULL) OR
(tc.category_id IS NULL AND tc.vendor = p.vendor) OR
(tc.category_id IS NULL AND tc.vendor IS NULL)
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL COALESCE(tc.calculation_period_days, 30) DAY)
GROUP BY pc.cat_id
)
UPDATE category_metrics cm
JOIN category_sales cs ON cm.category_id = cs.cat_id
LEFT JOIN turnover_config tc ON
(tc.category_id = cm.category_id AND tc.vendor IS NULL) OR
(tc.category_id IS NULL AND tc.vendor IS NULL)
SET
cm.avg_margin = COALESCE(cs.total_margin * 100.0 / NULLIF(cs.total_sales, 0), 0),
cm.turnover_rate = CASE
WHEN cs.avg_stock > 0 AND cs.active_days > 0
THEN LEAST(
(cs.units_sold / cs.avg_stock) * (365.0 / cs.active_days),
999.99
)
ELSE 0
END,
cm.last_calculated_at = NOW()
`);
const [batch] = await connection.query(`
SELECT DISTINCT c.cat_id
FROM categories c
JOIN product_categories pc ON c.cat_id = pc.cat_id
LEFT JOIN products p ON pc.pid = p.pid AND p.updated > ?
LEFT JOIN orders o ON p.pid = o.pid AND o.updated > ?
WHERE c.status = 'active'
AND c.cat_id > ?
AND (
p.pid IS NOT NULL
OR o.id IS NOT NULL
)
ORDER BY c.cat_id
LIMIT ?
`, [lastCalculationTime, lastCalculationTime, lastCatId, BATCH_SIZE]);
processedCount = Math.floor(totalProducts * 0.95);
outputProgress({
status: 'running',
operation: 'Margin data updated, calculating growth rates',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
if (batch.length === 0) break;
if (isCancelled) return {
processedProducts: processedCount,
processedOrders,
processedPurchaseOrders: 0,
success
};
// Update category metrics for this batch
await connection.query(`
INSERT INTO category_metrics (
category_id,
product_count,
active_products,
total_value,
avg_margin,
turnover_rate,
growth_rate,
status,
last_calculated_at
)
SELECT
c.cat_id,
COUNT(DISTINCT p.pid) as product_count,
COUNT(DISTINCT CASE WHEN p.visible = true THEN p.pid END) as active_products,
SUM(p.stock_quantity * p.cost_price) as total_value,
AVG(pm.avg_margin_percent) as avg_margin,
AVG(pm.turnover_rate) as turnover_rate,
((SUM(CASE
WHEN o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY)
THEN o.quantity * o.price
ELSE 0
END) / NULLIF(SUM(CASE
WHEN o.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 60 DAY) AND DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY)
THEN o.quantity * o.price
ELSE 0
END), 0) - 1) * 100) as growth_rate,
c.status,
NOW() as last_calculated_at
FROM categories c
JOIN product_categories pc ON c.cat_id = pc.cat_id
LEFT JOIN products p ON pc.pid = p.pid
LEFT JOIN product_metrics pm ON p.pid = pm.pid
LEFT JOIN orders o ON p.pid = o.pid
AND o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 60 DAY)
WHERE c.cat_id IN (?)
GROUP BY c.cat_id, c.status
ON DUPLICATE KEY UPDATE
product_count = VALUES(product_count),
active_products = VALUES(active_products),
total_value = VALUES(total_value),
avg_margin = VALUES(avg_margin),
turnover_rate = VALUES(turnover_rate),
growth_rate = VALUES(growth_rate),
status = VALUES(status),
last_calculated_at = NOW()
`, [batch.map(row => row.cat_id)]);
// Finally update growth rates
await connection.query(`
WITH current_period AS (
SELECT
pc.cat_id,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0) - p.cost_price)) as gross_profit,
COUNT(DISTINCT DATE(o.date)) as days
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
LEFT JOIN sales_seasonality ss ON MONTH(o.date) = ss.month
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH)
GROUP BY pc.cat_id
),
previous_period AS (
SELECT
pc.cat_id,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
COUNT(DISTINCT DATE(o.date)) as days
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
LEFT JOIN sales_seasonality ss ON MONTH(o.date) = ss.month
WHERE o.canceled = false
AND o.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 15 MONTH)
AND DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH)
GROUP BY pc.cat_id
),
trend_data AS (
SELECT
pc.cat_id,
MONTH(o.date) as month,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
COUNT(DISTINCT DATE(o.date)) as days_in_month
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
LEFT JOIN sales_seasonality ss ON MONTH(o.date) = ss.month
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 15 MONTH)
GROUP BY pc.cat_id, MONTH(o.date)
),
trend_stats AS (
SELECT
cat_id,
COUNT(*) as n,
AVG(month) as avg_x,
AVG(revenue / NULLIF(days_in_month, 0)) as avg_y,
SUM(month * (revenue / NULLIF(days_in_month, 0))) as sum_xy,
SUM(month * month) as sum_xx
FROM trend_data
GROUP BY cat_id
HAVING COUNT(*) >= 6
),
trend_analysis AS (
SELECT
cat_id,
((n * sum_xy) - (avg_x * n * avg_y)) /
NULLIF((n * sum_xx) - (n * avg_x * avg_x), 0) as trend_slope,
avg_y as avg_daily_revenue
FROM trend_stats
),
margin_calc AS (
SELECT
pc.cat_id,
CASE
WHEN SUM(o.quantity * o.price) > 0 THEN
GREATEST(
-100.0,
LEAST(
100.0,
(
SUM(o.quantity * o.price) - -- Use gross revenue (before discounts)
SUM(o.quantity * COALESCE(p.cost_price, 0)) -- Total costs
) * 100.0 /
NULLIF(SUM(o.quantity * o.price), 0) -- Divide by gross revenue
)
)
ELSE NULL
END as avg_margin
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH)
GROUP BY pc.cat_id
)
UPDATE category_metrics cm
LEFT JOIN current_period cp ON cm.category_id = cp.cat_id
LEFT JOIN previous_period pp ON cm.category_id = pp.cat_id
LEFT JOIN trend_analysis ta ON cm.category_id = ta.cat_id
LEFT JOIN margin_calc mc ON cm.category_id = mc.cat_id
SET
cm.growth_rate = CASE
WHEN pp.revenue = 0 AND COALESCE(cp.revenue, 0) > 0 THEN 100.0
WHEN pp.revenue = 0 OR cp.revenue IS NULL THEN 0.0
WHEN ta.trend_slope IS NOT NULL THEN
GREATEST(
-100.0,
LEAST(
(ta.trend_slope / NULLIF(ta.avg_daily_revenue, 0)) * 365 * 100,
999.99
)
)
ELSE
GREATEST(
-100.0,
LEAST(
((COALESCE(cp.revenue, 0) - pp.revenue) /
NULLIF(ABS(pp.revenue), 0)) * 100.0,
999.99
)
)
END,
cm.avg_margin = COALESCE(mc.avg_margin, cm.avg_margin),
cm.last_calculated_at = NOW()
WHERE cp.cat_id IS NOT NULL OR pp.cat_id IS NOT NULL
`);
lastCatId = batch[batch.length - 1].cat_id;
processedCount += batch.length;
processedCount = Math.floor(totalProducts * 0.97);
outputProgress({
status: 'running',
operation: 'Growth rates calculated, updating time-based metrics',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
if (isCancelled) return {
processedProducts: processedCount,
processedOrders,
processedPurchaseOrders: 0,
success
};
// Calculate time-based metrics
await connection.query(`
INSERT INTO category_time_metrics (
category_id,
year,
month,
product_count,
active_products,
total_value,
total_revenue,
avg_margin,
turnover_rate
)
SELECT
pc.cat_id,
YEAR(o.date) as year,
MONTH(o.date) as month,
COUNT(DISTINCT p.pid) as product_count,
COUNT(DISTINCT CASE WHEN p.visible = true THEN p.pid END) as active_products,
SUM(p.stock_quantity * p.cost_price) as total_value,
SUM(o.quantity * o.price) as total_revenue,
CASE
WHEN SUM(o.quantity * o.price) > 0 THEN
LEAST(
GREATEST(
SUM(o.quantity * (o.price - GREATEST(p.cost_price, 0))) * 100.0 /
SUM(o.quantity * o.price),
-100
),
100
)
ELSE 0
END as avg_margin,
COALESCE(
LEAST(
SUM(o.quantity) / NULLIF(AVG(GREATEST(p.stock_quantity, 0)), 0),
999.99
),
0
) as turnover_rate
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH)
GROUP BY pc.cat_id, YEAR(o.date), MONTH(o.date)
ON DUPLICATE KEY UPDATE
product_count = VALUES(product_count),
active_products = VALUES(active_products),
total_value = VALUES(total_value),
total_revenue = VALUES(total_revenue),
avg_margin = VALUES(avg_margin),
turnover_rate = VALUES(turnover_rate)
`);
processedCount = Math.floor(totalProducts * 0.99);
outputProgress({
status: 'running',
operation: 'Time-based metrics calculated, updating category-sales metrics',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
if (isCancelled) return {
processedProducts: processedCount,
processedOrders,
processedPurchaseOrders: 0,
success
};
// Calculate category-sales metrics
await connection.query(`
INSERT INTO category_sales_metrics (
category_id,
brand,
period_start,
period_end,
avg_daily_sales,
total_sold,
num_products,
avg_price,
last_calculated_at
)
WITH date_ranges AS (
SELECT
DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY) as period_start,
CURRENT_DATE as period_end
UNION ALL
SELECT
DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY),
DATE_SUB(CURRENT_DATE, INTERVAL 31 DAY)
UNION ALL
SELECT
DATE_SUB(CURRENT_DATE, INTERVAL 180 DAY),
DATE_SUB(CURRENT_DATE, INTERVAL 91 DAY)
UNION ALL
SELECT
DATE_SUB(CURRENT_DATE, INTERVAL 365 DAY),
DATE_SUB(CURRENT_DATE, INTERVAL 181 DAY)
),
sales_data AS (
SELECT
pc.cat_id,
COALESCE(p.brand, 'Unknown') as brand,
dr.period_start,
dr.period_end,
COUNT(DISTINCT p.pid) as num_products,
SUM(o.quantity) as total_sold,
SUM(o.quantity * o.price) as total_revenue,
COUNT(DISTINCT DATE(o.date)) as num_days
FROM products p
JOIN product_categories pc ON p.pid = pc.pid
JOIN orders o ON p.pid = o.pid
CROSS JOIN date_ranges dr
WHERE o.canceled = false
AND o.date BETWEEN dr.period_start AND dr.period_end
GROUP BY pc.cat_id, p.brand, dr.period_start, dr.period_end
)
SELECT
cat_id as category_id,
brand,
period_start,
period_end,
CASE
WHEN num_days > 0
THEN total_sold / num_days
ELSE 0
END as avg_daily_sales,
total_sold,
num_products,
CASE
WHEN total_sold > 0
THEN total_revenue / total_sold
ELSE 0
END as avg_price,
NOW() as last_calculated_at
FROM sales_data
ON DUPLICATE KEY UPDATE
avg_daily_sales = VALUES(avg_daily_sales),
total_sold = VALUES(total_sold),
num_products = VALUES(num_products),
avg_price = VALUES(avg_price),
last_calculated_at = VALUES(last_calculated_at)
`);
processedCount = Math.floor(totalProducts * 1.0);
outputProgress({
status: 'running',
operation: 'Category-sales metrics calculated',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
outputProgress({
status: 'running',
operation: 'Processing category metrics batch',
current: processedCount,
total: totalCategories,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalCategories),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalCategories) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
}
// If we get here, everything completed successfully
success = true;
@@ -503,7 +186,7 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
return {
processedProducts: processedCount,
processedOrders,
processedOrders: 0,
processedPurchaseOrders: 0,
success
};