Fix (probably) discrepancies and errors in import/calculate scripts

This commit is contained in:
2025-02-02 00:01:46 -05:00
parent bd5bcdd548
commit 8a43da502a
10 changed files with 423 additions and 218 deletions

View File

@@ -55,10 +55,21 @@ async function calculateBrandMetrics(startTime, totalProducts, processedCount, i
WITH filtered_products AS (
SELECT
p.*,
CASE WHEN p.stock_quantity <= 5000 THEN p.pid END as valid_pid,
CASE WHEN p.visible = true AND p.stock_quantity <= 5000 THEN p.pid END as active_pid,
CASE
WHEN p.stock_quantity IS NULL OR p.stock_quantity < 0 OR p.stock_quantity > 5000 THEN 0
WHEN p.stock_quantity <= 5000 AND p.stock_quantity >= 0
THEN p.pid
END as valid_pid,
CASE
WHEN p.visible = true
AND p.stock_quantity <= 5000
AND p.stock_quantity >= 0
THEN p.pid
END as active_pid,
CASE
WHEN p.stock_quantity IS NULL
OR p.stock_quantity < 0
OR p.stock_quantity > 5000
THEN 0
ELSE p.stock_quantity
END as valid_stock
FROM products p
@@ -67,10 +78,13 @@ async function calculateBrandMetrics(startTime, totalProducts, processedCount, i
sales_periods AS (
SELECT
p.brand,
SUM(o.quantity * o.price) as period_revenue,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0))) as period_revenue,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0) - p.cost_price)) as period_margin,
COUNT(DISTINCT DATE(o.date)) as period_days,
CASE
WHEN o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) THEN 'current'
WHEN o.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 15 MONTH) AND DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) THEN 'previous'
WHEN o.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 15 MONTH)
AND DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) THEN 'previous'
END as period_type
FROM filtered_products p
JOIN orders o ON p.pid = o.pid
@@ -86,10 +100,11 @@ async function calculateBrandMetrics(startTime, totalProducts, processedCount, i
SUM(p.valid_stock) as total_stock_units,
SUM(p.valid_stock * p.cost_price) as total_stock_cost,
SUM(p.valid_stock * p.price) as total_stock_retail,
COALESCE(SUM(o.quantity * o.price), 0) as total_revenue,
COALESCE(SUM(o.quantity * (o.price - COALESCE(o.discount, 0))), 0) as total_revenue,
CASE
WHEN SUM(o.quantity * o.price) > 0 THEN
(SUM((o.price - p.cost_price) * o.quantity) * 100.0) / SUM(o.price * o.quantity)
WHEN SUM(o.quantity * (o.price - COALESCE(o.discount, 0))) > 0
THEN (SUM(o.quantity * (o.price - COALESCE(o.discount, 0) - p.cost_price)) * 100.0) /
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)))
ELSE 0
END as avg_margin
FROM filtered_products p
@@ -107,16 +122,18 @@ async function calculateBrandMetrics(startTime, totalProducts, processedCount, i
bd.avg_margin,
CASE
WHEN MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END) = 0
AND MAX(CASE WHEN sp.period_type = 'current' THEN sp.period_revenue END) > 0 THEN 100.0
WHEN MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END) = 0 THEN 0.0
ELSE LEAST(
GREATEST(
AND MAX(CASE WHEN sp.period_type = 'current' THEN sp.period_revenue END) > 0
THEN 100.0
WHEN MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END) = 0
THEN 0.0
ELSE GREATEST(
-100.0,
LEAST(
((MAX(CASE WHEN sp.period_type = 'current' THEN sp.period_revenue END) -
MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END)) /
NULLIF(MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END), 0)) * 100.0,
-100.0
),
999.99
NULLIF(ABS(MAX(CASE WHEN sp.period_type = 'previous' THEN sp.period_revenue END)), 0)) * 100.0,
999.99
)
)
END as growth_rate
FROM brand_data bd

View File

@@ -151,7 +151,9 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
WITH current_period AS (
SELECT
pc.cat_id,
SUM(o.quantity * o.price / (1 + COALESCE(ss.seasonality_factor, 0))) as revenue
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
COUNT(DISTINCT DATE(o.date)) as days
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
@@ -163,7 +165,9 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
previous_period AS (
SELECT
pc.cat_id,
SUM(o.quantity * o.price / (1 + COALESCE(ss.seasonality_factor, 0))) as revenue
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
COUNT(DISTINCT DATE(o.date)) as days
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
JOIN orders o ON p.pid = o.pid
@@ -177,7 +181,8 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
SELECT
pc.cat_id,
MONTH(o.date) as month,
SUM(o.quantity * o.price / (1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
SUM(o.quantity * (o.price - COALESCE(o.discount, 0)) /
(1 + COALESCE(ss.seasonality_factor, 0))) as revenue,
COUNT(DISTINCT DATE(o.date)) as days_in_month
FROM product_categories pc
JOIN products p ON pc.pid = p.pid
@@ -192,8 +197,8 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
cat_id,
COUNT(*) as n,
AVG(month) as avg_x,
AVG(revenue / days_in_month) as avg_y,
SUM(month * (revenue / days_in_month)) as sum_xy,
AVG(revenue / NULLIF(days_in_month, 0)) as avg_y,
SUM(month * (revenue / NULLIF(days_in_month, 0))) as sum_xy,
SUM(month * month) as sum_xx
FROM trend_data
GROUP BY cat_id
@@ -202,7 +207,8 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
trend_analysis AS (
SELECT
cat_id,
((n * sum_xy) - (avg_x * n * avg_y)) / ((n * sum_xx) - (n * avg_x * avg_x)) as trend_slope,
((n * sum_xy) - (avg_x * n * avg_y)) /
NULLIF((n * sum_xx) - (n * avg_x * avg_x), 0) as trend_slope,
avg_y as avg_daily_revenue
FROM trend_stats
)
@@ -213,24 +219,31 @@ async function calculateCategoryMetrics(startTime, totalProducts, processedCount
SET
cm.growth_rate = CASE
WHEN pp.revenue = 0 AND COALESCE(cp.revenue, 0) > 0 THEN 100.0
WHEN pp.revenue = 0 THEN 0.0
WHEN pp.revenue = 0 OR cp.revenue IS NULL THEN 0.0
WHEN ta.trend_slope IS NOT NULL THEN
LEAST(
GREATEST(
GREATEST(
-100.0,
LEAST(
(ta.trend_slope / NULLIF(ta.avg_daily_revenue, 0)) * 365 * 100,
-100.0
),
999.99
999.99
)
)
ELSE
LEAST(
GREATEST(
((COALESCE(cp.revenue, 0) - pp.revenue) / pp.revenue) * 100.0,
-100.0
),
999.99
GREATEST(
-100.0,
LEAST(
((COALESCE(cp.revenue, 0) - pp.revenue) /
NULLIF(ABS(pp.revenue), 0)) * 100.0,
999.99
)
)
END,
cm.avg_margin = CASE
WHEN cp.revenue > 0 THEN
(SUM(o.quantity * (o.price - COALESCE(o.discount, 0) - p.cost_price)) /
NULLIF(SUM(o.quantity * (o.price - COALESCE(o.discount, 0))), 0)) * 100
ELSE cm.avg_margin
END,
cm.last_calculated_at = NOW()
WHERE cp.cat_id IS NOT NULL OR pp.cat_id IS NOT NULL
`);

View File

@@ -286,6 +286,146 @@ async function calculateProductMetrics(startTime, totalProducts, processedCount
});
}
// Calculate ABC classification
outputProgress({
status: 'running',
operation: 'Starting ABC classification',
current: processedCount,
total: totalProducts,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, processedCount, totalProducts),
rate: calculateRate(startTime, processedCount),
percentage: ((processedCount / totalProducts) * 100).toFixed(1),
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
}
});
if (isCancelled) return processedCount;
const [abcConfig] = await connection.query('SELECT a_threshold, b_threshold FROM abc_classification_config WHERE id = 1');
const abcThresholds = abcConfig[0] || { a_threshold: 20, b_threshold: 50 };
// First, create and populate the rankings table with an index
await connection.query('DROP TEMPORARY TABLE IF EXISTS temp_revenue_ranks');
await connection.query(`
CREATE TEMPORARY TABLE temp_revenue_ranks (
pid BIGINT NOT NULL,
total_revenue DECIMAL(10,3),
rank_num INT,
dense_rank INT,
percentile DECIMAL(5,2),
total_count INT,
PRIMARY KEY (pid),
INDEX (rank_num),
INDEX (dense_rank),
INDEX (percentile)
) ENGINE=MEMORY
`);
// Calculate rankings with proper tie handling
await connection.query(`
INSERT INTO temp_revenue_ranks
WITH revenue_data AS (
SELECT
pid,
total_revenue,
COUNT(*) OVER () as total_count,
PERCENT_RANK() OVER (ORDER BY total_revenue DESC) * 100 as percentile,
RANK() OVER (ORDER BY total_revenue DESC) as rank_num,
DENSE_RANK() OVER (ORDER BY total_revenue DESC) as dense_rank
FROM product_metrics
WHERE total_revenue > 0
)
SELECT
pid,
total_revenue,
rank_num,
dense_rank,
percentile,
total_count
FROM revenue_data
`);
// Get total count for percentage calculation
const [rankingCount] = await connection.query('SELECT MAX(rank_num) as total_count FROM temp_revenue_ranks');
const totalCount = rankingCount[0].total_count || 1;
const max_rank = totalCount;
// Process updates in batches
let abcProcessedCount = 0;
const batchSize = 5000;
while (true) {
if (isCancelled) return processedCount;
// Get a batch of PIDs that need updating
const [pids] = await connection.query(`
SELECT pm.pid
FROM product_metrics pm
LEFT JOIN temp_revenue_ranks tr ON pm.pid = tr.pid
WHERE pm.abc_class IS NULL
OR pm.abc_class !=
CASE
WHEN tr.pid IS NULL THEN 'C'
WHEN tr.percentile <= ? THEN 'A'
WHEN tr.percentile <= ? THEN 'B'
ELSE 'C'
END
LIMIT ?
`, [abcThresholds.a_threshold, abcThresholds.b_threshold, batchSize]);
if (pids.length === 0) break;
await connection.query(`
UPDATE product_metrics pm
LEFT JOIN temp_revenue_ranks tr ON pm.pid = tr.pid
SET pm.abc_class =
CASE
WHEN tr.pid IS NULL THEN 'C'
WHEN tr.percentile <= ? THEN 'A'
WHEN tr.percentile <= ? THEN 'B'
ELSE 'C'
END,
pm.last_calculated_at = NOW()
WHERE pm.pid IN (?)
`, [abcThresholds.a_threshold, abcThresholds.b_threshold, pids.map(row => row.pid)]);
// Now update turnover rate with proper handling of zero inventory periods
await connection.query(`
UPDATE product_metrics pm
JOIN (
SELECT
o.pid,
SUM(o.quantity) as total_sold,
COUNT(DISTINCT DATE(o.date)) as active_days,
AVG(CASE
WHEN p.stock_quantity > 0 THEN p.stock_quantity
ELSE NULL
END) as avg_nonzero_stock
FROM orders o
JOIN products p ON o.pid = p.pid
WHERE o.canceled = false
AND o.date >= DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY)
AND o.pid IN (?)
GROUP BY o.pid
) sales ON pm.pid = sales.pid
SET
pm.turnover_rate = CASE
WHEN sales.avg_nonzero_stock > 0 AND sales.active_days > 0
THEN LEAST(
(sales.total_sold / sales.avg_nonzero_stock) * (365.0 / sales.active_days),
999.99
)
ELSE 0
END,
pm.last_calculated_at = NOW()
WHERE pm.pid IN (?)
`, [pids.map(row => row.pid), pids.map(row => row.pid)]);
}
return processedCount;
} catch (error) {
logError(error, 'Error calculating product metrics');

View File

@@ -159,37 +159,72 @@ async function calculateSalesForecasts(startTime, totalProducts, processedCount,
confidence_level,
last_calculated_at
)
WITH daily_stats AS (
SELECT
ds.pid,
AVG(ds.daily_quantity) as avg_daily_qty,
STDDEV(ds.daily_quantity) as std_daily_qty,
COUNT(DISTINCT ds.day_count) as data_points,
SUM(ds.day_count) as total_days,
AVG(ds.daily_revenue) as avg_daily_revenue,
STDDEV(ds.daily_revenue) as std_daily_revenue,
MIN(ds.daily_quantity) as min_daily_qty,
MAX(ds.daily_quantity) as max_daily_qty,
AVG(ABS(ds.daily_quantity - LAG(ds.daily_quantity) OVER (PARTITION BY ds.pid ORDER BY ds.day_of_week))) as avg_daily_variance
FROM temp_daily_sales ds
GROUP BY ds.pid
HAVING AVG(ds.daily_quantity) > 0
)
SELECT
ds.pid,
fd.forecast_date,
GREATEST(0,
AVG(ds.daily_quantity) *
(1 + COALESCE(sf.seasonality_factor, 0))
ROUND(
ds.avg_daily_qty *
(1 + COALESCE(sf.seasonality_factor, 0)) *
CASE
WHEN ds.std_daily_qty / NULLIF(ds.avg_daily_qty, 0) > 1.5 THEN 0.85
WHEN ds.std_daily_qty / NULLIF(ds.avg_daily_qty, 0) > 1.0 THEN 0.9
WHEN ds.std_daily_qty / NULLIF(ds.avg_daily_qty, 0) > 0.5 THEN 0.95
ELSE 1.0
END,
2
)
) as forecast_units,
GREATEST(0,
COALESCE(
CASE
WHEN SUM(ds.day_count) >= 4 THEN AVG(ds.daily_revenue)
ELSE ps.overall_avg_revenue
END *
(1 + COALESCE(sf.seasonality_factor, 0)) *
(0.95 + (RAND() * 0.1)),
0
ROUND(
COALESCE(
CASE
WHEN ds.data_points >= 4 THEN ds.avg_daily_revenue
ELSE ps.overall_avg_revenue
END *
(1 + COALESCE(sf.seasonality_factor, 0)) *
CASE
WHEN ds.std_daily_revenue / NULLIF(ds.avg_daily_revenue, 0) > 1.5 THEN 0.85
WHEN ds.std_daily_revenue / NULLIF(ds.avg_daily_revenue, 0) > 1.0 THEN 0.9
WHEN ds.std_daily_revenue / NULLIF(ds.avg_daily_revenue, 0) > 0.5 THEN 0.95
ELSE 1.0
END,
0
),
2
)
) as forecast_revenue,
CASE
WHEN ps.total_days >= 60 THEN 90
WHEN ps.total_days >= 30 THEN 80
WHEN ps.total_days >= 14 THEN 70
WHEN ds.total_days >= 60 AND ds.avg_daily_variance / NULLIF(ds.avg_daily_qty, 0) < 0.5 THEN 90
WHEN ds.total_days >= 60 THEN 85
WHEN ds.total_days >= 30 AND ds.avg_daily_variance / NULLIF(ds.avg_daily_qty, 0) < 0.5 THEN 80
WHEN ds.total_days >= 30 THEN 75
WHEN ds.total_days >= 14 AND ds.avg_daily_variance / NULLIF(ds.avg_daily_qty, 0) < 0.5 THEN 70
WHEN ds.total_days >= 14 THEN 65
ELSE 60
END as confidence_level,
NOW() as last_calculated_at
FROM temp_daily_sales ds
FROM daily_stats ds
JOIN temp_product_stats ps ON ds.pid = ps.pid
CROSS JOIN temp_forecast_dates fd
LEFT JOIN sales_seasonality sf ON fd.month = sf.month
GROUP BY ds.pid, fd.forecast_date, ps.overall_avg_revenue, ps.total_days, sf.seasonality_factor
HAVING AVG(ds.daily_quantity) > 0
GROUP BY ds.pid, fd.forecast_date, ps.overall_avg_revenue, sf.seasonality_factor
ON DUPLICATE KEY UPDATE
forecast_units = VALUES(forecast_units),
forecast_revenue = VALUES(forecast_revenue),

View File

@@ -56,7 +56,7 @@ async function calculateTimeAggregates(startTime, totalProducts, processedCount,
inventory_value,
gmroi
)
WITH sales_data AS (
WITH monthly_sales AS (
SELECT
o.pid,
YEAR(o.date) as year,
@@ -66,19 +66,13 @@ async function calculateTimeAggregates(startTime, totalProducts, processedCount,
SUM(COALESCE(p.cost_price, 0) * o.quantity) as total_cost,
COUNT(DISTINCT o.order_number) as order_count,
AVG(o.price - COALESCE(o.discount, 0)) as avg_price,
CASE
WHEN SUM((o.price - COALESCE(o.discount, 0)) * o.quantity) = 0 THEN 0
ELSE ((SUM((o.price - COALESCE(o.discount, 0)) * o.quantity) -
SUM(COALESCE(p.cost_price, 0) * o.quantity)) /
SUM((o.price - COALESCE(o.discount, 0)) * o.quantity)) * 100
END as profit_margin,
p.cost_price * p.stock_quantity as inventory_value
COUNT(DISTINCT DATE(o.date)) as active_days
FROM orders o
JOIN products p ON o.pid = p.pid
WHERE o.canceled = 0
GROUP BY o.pid, YEAR(o.date), MONTH(o.date), p.cost_price, p.stock_quantity
WHERE o.canceled = false
GROUP BY o.pid, YEAR(o.date), MONTH(o.date)
),
purchase_data AS (
monthly_stock AS (
SELECT
pid,
YEAR(date) as year,