Optimize metrics import and split off metrics import functions (untested)

This commit is contained in:
2025-01-11 14:52:47 -05:00
parent 30018ad882
commit eed032735d
7 changed files with 1082 additions and 254 deletions

View File

@@ -0,0 +1,359 @@
const mysql = require('mysql2/promise');
const path = require('path');
require('dotenv').config({ path: path.resolve(__dirname, '..', '.env') });
// Helper function to output progress
function outputProgress(data) {
console.log(JSON.stringify(data));
}
// Helper function to log errors
function logError(error, context) {
console.error(JSON.stringify({
status: 'error',
error: error.message || error,
context
}));
}
// Database configuration
const dbConfig = {
host: process.env.DB_HOST,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME,
waitForConnections: true,
connectionLimit: 10,
queueLimit: 0
};
async function calculateMetrics() {
let pool;
try {
pool = mysql.createPool(dbConfig);
const connection = await pool.getConnection();
try {
// Create temporary tables for metrics calculations
outputProgress({
status: 'running',
operation: 'Creating temporary tables',
percentage: '0'
});
await connection.query(`
CREATE TABLE IF NOT EXISTS temp_sales_metrics (
product_id INT PRIMARY KEY,
total_quantity_sold INT DEFAULT 0,
total_revenue DECIMAL(10,2) DEFAULT 0.00,
average_price DECIMAL(10,2) DEFAULT 0.00,
last_sale_date DATE,
sales_rank INT
);
CREATE TABLE IF NOT EXISTS temp_purchase_metrics (
product_id INT PRIMARY KEY,
total_quantity_purchased INT DEFAULT 0,
total_cost DECIMAL(10,2) DEFAULT 0.00,
average_cost DECIMAL(10,2) DEFAULT 0.00,
last_purchase_date DATE,
purchase_rank INT
);
TRUNCATE TABLE temp_sales_metrics;
TRUNCATE TABLE temp_purchase_metrics;
`);
// Calculate sales metrics
outputProgress({
status: 'running',
operation: 'Calculating sales metrics',
percentage: '20'
});
await connection.query(`
INSERT INTO temp_sales_metrics (
product_id,
total_quantity_sold,
total_revenue,
average_price,
last_sale_date
)
SELECT
product_id,
SUM(quantity) as total_quantity_sold,
SUM((price - COALESCE(discount, 0)) * quantity) as total_revenue,
AVG(price - COALESCE(discount, 0)) as average_price,
MAX(date) as last_sale_date
FROM orders
WHERE canceled = 0
GROUP BY product_id;
UPDATE temp_sales_metrics
SET sales_rank = (
SELECT rank
FROM (
SELECT
product_id,
RANK() OVER (ORDER BY total_revenue DESC) as rank
FROM temp_sales_metrics
) rankings
WHERE rankings.product_id = temp_sales_metrics.product_id
);
`);
// Calculate purchase metrics
outputProgress({
status: 'running',
operation: 'Calculating purchase metrics',
percentage: '40'
});
await connection.query(`
INSERT INTO temp_purchase_metrics (
product_id,
total_quantity_purchased,
total_cost,
average_cost,
last_purchase_date
)
SELECT
product_id,
SUM(received) as total_quantity_purchased,
SUM(cost_price * received) as total_cost,
AVG(cost_price) as average_cost,
MAX(received_date) as last_purchase_date
FROM purchase_orders
WHERE status = 'closed' AND received > 0
GROUP BY product_id;
UPDATE temp_purchase_metrics
SET purchase_rank = (
SELECT rank
FROM (
SELECT
product_id,
RANK() OVER (ORDER BY total_cost DESC) as rank
FROM temp_purchase_metrics
) rankings
WHERE rankings.product_id = temp_purchase_metrics.product_id
);
`);
// Update product metrics
outputProgress({
status: 'running',
operation: 'Updating product metrics',
percentage: '60'
});
await connection.query(`
INSERT INTO product_metrics (
product_id,
total_quantity_sold,
total_revenue,
average_price,
total_quantity_purchased,
total_cost,
average_cost,
profit_margin,
turnover_rate,
last_sale_date,
last_purchase_date,
sales_rank,
purchase_rank,
last_calculated_at
)
SELECT
p.product_id,
COALESCE(s.total_quantity_sold, 0),
COALESCE(s.total_revenue, 0.00),
COALESCE(s.average_price, 0.00),
COALESCE(po.total_quantity_purchased, 0),
COALESCE(po.total_cost, 0.00),
COALESCE(po.average_cost, 0.00),
CASE
WHEN COALESCE(s.total_revenue, 0) = 0 THEN 0
ELSE ((s.total_revenue - po.total_cost) / s.total_revenue) * 100
END as profit_margin,
CASE
WHEN COALESCE(po.total_quantity_purchased, 0) = 0 THEN 0
ELSE (s.total_quantity_sold / po.total_quantity_purchased) * 100
END as turnover_rate,
s.last_sale_date,
po.last_purchase_date,
s.sales_rank,
po.purchase_rank,
NOW()
FROM products p
LEFT JOIN temp_sales_metrics s ON p.product_id = s.product_id
LEFT JOIN temp_purchase_metrics po ON p.product_id = po.product_id
ON DUPLICATE KEY UPDATE
total_quantity_sold = VALUES(total_quantity_sold),
total_revenue = VALUES(total_revenue),
average_price = VALUES(average_price),
total_quantity_purchased = VALUES(total_quantity_purchased),
total_cost = VALUES(total_cost),
average_cost = VALUES(average_cost),
profit_margin = VALUES(profit_margin),
turnover_rate = VALUES(turnover_rate),
last_sale_date = VALUES(last_sale_date),
last_purchase_date = VALUES(last_purchase_date),
sales_rank = VALUES(sales_rank),
purchase_rank = VALUES(purchase_rank),
last_calculated_at = VALUES(last_calculated_at);
`);
// Calculate ABC classification
outputProgress({
status: 'running',
operation: 'Calculating ABC classification',
percentage: '80'
});
await connection.query(`
WITH revenue_percentiles AS (
SELECT
product_id,
total_revenue,
PERCENT_RANK() OVER (ORDER BY total_revenue DESC) as revenue_percentile
FROM product_metrics
WHERE total_revenue > 0
)
UPDATE product_metrics pm
JOIN revenue_percentiles rp ON pm.product_id = rp.product_id
SET pm.abc_class =
CASE
WHEN rp.revenue_percentile < 0.2 THEN 'A'
WHEN rp.revenue_percentile < 0.5 THEN 'B'
ELSE 'C'
END;
`);
// Calculate time-based aggregates
outputProgress({
status: 'running',
operation: 'Calculating time aggregates',
percentage: '90'
});
await connection.query(`
TRUNCATE TABLE product_time_aggregates;
-- Daily aggregates
INSERT INTO product_time_aggregates (product_id, period_type, period_start, quantity_sold, revenue)
SELECT
product_id,
'daily' as period_type,
DATE(date) as period_start,
SUM(quantity) as quantity_sold,
SUM((price - COALESCE(discount, 0)) * quantity) as revenue
FROM orders
WHERE canceled = 0
GROUP BY product_id, DATE(date);
-- Weekly aggregates
INSERT INTO product_time_aggregates (product_id, period_type, period_start, quantity_sold, revenue)
SELECT
product_id,
'weekly' as period_type,
DATE(DATE_SUB(date, INTERVAL WEEKDAY(date) DAY)) as period_start,
SUM(quantity) as quantity_sold,
SUM((price - COALESCE(discount, 0)) * quantity) as revenue
FROM orders
WHERE canceled = 0
GROUP BY product_id, DATE(DATE_SUB(date, INTERVAL WEEKDAY(date) DAY));
-- Monthly aggregates
INSERT INTO product_time_aggregates (product_id, period_type, period_start, quantity_sold, revenue)
SELECT
product_id,
'monthly' as period_type,
DATE(DATE_SUB(date, INTERVAL DAY(date)-1 DAY)) as period_start,
SUM(quantity) as quantity_sold,
SUM((price - COALESCE(discount, 0)) * quantity) as revenue
FROM orders
WHERE canceled = 0
GROUP BY product_id, DATE(DATE_SUB(date, INTERVAL DAY(date)-1 DAY));
`);
// Calculate vendor metrics
outputProgress({
status: 'running',
operation: 'Calculating vendor metrics',
percentage: '95'
});
await connection.query(`
INSERT INTO vendor_metrics (
vendor,
total_orders,
total_items_ordered,
total_items_received,
total_spend,
average_order_value,
fulfillment_rate,
average_delivery_days,
last_order_date,
last_delivery_date
)
SELECT
vendor,
COUNT(DISTINCT po_id) as total_orders,
SUM(ordered) as total_items_ordered,
SUM(received) as total_items_received,
SUM(cost_price * received) as total_spend,
AVG(cost_price * ordered) as average_order_value,
(SUM(received) / NULLIF(SUM(ordered), 0)) * 100 as fulfillment_rate,
AVG(DATEDIFF(received_date, date)) as average_delivery_days,
MAX(date) as last_order_date,
MAX(received_date) as last_delivery_date
FROM purchase_orders
WHERE status = 'closed'
GROUP BY vendor
ON DUPLICATE KEY UPDATE
total_orders = VALUES(total_orders),
total_items_ordered = VALUES(total_items_ordered),
total_items_received = VALUES(total_items_received),
total_spend = VALUES(total_spend),
average_order_value = VALUES(average_order_value),
fulfillment_rate = VALUES(fulfillment_rate),
average_delivery_days = VALUES(average_delivery_days),
last_order_date = VALUES(last_order_date),
last_delivery_date = VALUES(last_delivery_date);
`);
outputProgress({
status: 'complete',
operation: 'Metrics calculation completed',
percentage: '100'
});
} catch (error) {
logError(error, 'Error calculating metrics');
throw error;
} finally {
connection.release();
}
} catch (error) {
logError(error, 'Fatal error during metrics calculation');
throw error;
} finally {
if (pool) {
await pool.end();
}
}
}
// Export the function if being required as a module
if (typeof module !== 'undefined' && module.exports) {
module.exports = calculateMetrics;
}
// Run directly if called from command line
if (require.main === module) {
calculateMetrics().catch(error => {
console.error('Error:', error);
process.exit(1);
});
}

View File

@@ -401,38 +401,75 @@ async function calculateVendorMetrics(connection) {
}
}
// Helper function to update product metrics
async function updateProductMetrics(connection, productId, startTime, current, total) {
// Helper function to calculate metrics in batches
async function calculateMetricsInBatch(connection) {
try {
// Calculate sales velocity metrics
const velocityMetrics = await calculateSalesVelocity(connection, productId);
// Calculate stock metrics
const stockMetrics = await calculateStockMetrics(connection, productId, velocityMetrics.daily_sales_avg);
// Calculate financial metrics
const financialMetrics = await calculateFinancialMetrics(connection, productId);
// Calculate purchase metrics
const purchaseMetrics = await calculatePurchaseMetrics(connection, productId);
// Update metrics in database
// Clear temporary tables
await connection.query('TRUNCATE TABLE temp_sales_metrics');
await connection.query('TRUNCATE TABLE temp_purchase_metrics');
// Calculate sales metrics for all products in one go
await connection.query(`
INSERT INTO temp_sales_metrics
SELECT
o.product_id,
COUNT(*) / NULLIF(DATEDIFF(MAX(o.date), MIN(o.date)), 0) as daily_sales_avg,
SUM(o.quantity) / NULLIF(DATEDIFF(MAX(o.date), MIN(o.date)), 0) * 7 as weekly_sales_avg,
SUM(o.quantity) / NULLIF(DATEDIFF(MAX(o.date), MIN(o.date)), 0) * 30 as monthly_sales_avg,
SUM(o.price * o.quantity) as total_revenue,
AVG((o.price - p.cost_price) / o.price * 100) as avg_margin_percent,
MIN(o.date) as first_sale_date,
MAX(o.date) as last_sale_date
FROM orders o
JOIN products p ON o.product_id = p.product_id
WHERE o.canceled = false
GROUP BY o.product_id
`);
// Calculate purchase metrics for all products in one go
await connection.query(`
INSERT INTO temp_purchase_metrics
SELECT
product_id,
AVG(DATEDIFF(received_date, date)) as avg_lead_time_days,
MAX(date) as last_purchase_date,
MAX(received_date) as last_received_date
FROM purchase_orders
WHERE status = 'closed'
GROUP BY product_id
`);
// Update product_metrics table with all metrics at once
await connection.query(`
INSERT INTO product_metrics (
product_id,
daily_sales_avg,
weekly_sales_avg,
monthly_sales_avg,
days_of_inventory,
weeks_of_inventory,
safety_stock,
reorder_point,
total_revenue,
avg_margin_percent,
avg_lead_time_days,
last_purchase_date,
last_received_date
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
product_id, daily_sales_avg, weekly_sales_avg, monthly_sales_avg,
days_of_inventory, weeks_of_inventory, safety_stock, reorder_point,
avg_margin_percent, total_revenue, avg_lead_time_days,
last_purchase_date, last_received_date
)
SELECT
p.product_id,
COALESCE(s.daily_sales_avg, 0),
COALESCE(s.weekly_sales_avg, 0),
COALESCE(s.monthly_sales_avg, 0),
CASE
WHEN s.daily_sales_avg > 0 THEN FLOOR(p.stock_quantity / s.daily_sales_avg)
ELSE 999
END as days_of_inventory,
CASE
WHEN s.daily_sales_avg > 0 THEN FLOOR(p.stock_quantity / s.daily_sales_avg / 7)
ELSE 999
END as weeks_of_inventory,
CEIL(COALESCE(s.daily_sales_avg, 0) * 14) as safety_stock,
CEIL(COALESCE(s.daily_sales_avg, 0) * 21) as reorder_point,
COALESCE(s.avg_margin_percent, 0),
COALESCE(s.total_revenue, 0),
COALESCE(pm.avg_lead_time_days, 0),
pm.last_purchase_date,
pm.last_received_date
FROM products p
LEFT JOIN temp_sales_metrics s ON p.product_id = s.product_id
LEFT JOIN temp_purchase_metrics pm ON p.product_id = pm.product_id
ON DUPLICATE KEY UPDATE
daily_sales_avg = VALUES(daily_sales_avg),
weekly_sales_avg = VALUES(weekly_sales_avg),
@@ -441,34 +478,37 @@ async function updateProductMetrics(connection, productId, startTime, current, t
weeks_of_inventory = VALUES(weeks_of_inventory),
safety_stock = VALUES(safety_stock),
reorder_point = VALUES(reorder_point),
total_revenue = VALUES(total_revenue),
avg_margin_percent = VALUES(avg_margin_percent),
total_revenue = VALUES(total_revenue),
avg_lead_time_days = VALUES(avg_lead_time_days),
last_purchase_date = VALUES(last_purchase_date),
last_received_date = VALUES(last_received_date)
`, [
productId,
velocityMetrics.daily_sales_avg,
velocityMetrics.weekly_sales_avg,
velocityMetrics.monthly_sales_avg,
stockMetrics?.days_of_inventory || 0,
stockMetrics?.weeks_of_inventory || 0,
stockMetrics?.safety_stock || 0,
stockMetrics?.reorder_point || 0,
financialMetrics.total_revenue,
financialMetrics.avg_margin_percent,
purchaseMetrics.avg_lead_time_days,
purchaseMetrics.last_purchase_date,
purchaseMetrics.last_received_date
]);
last_received_date = VALUES(last_received_date),
last_calculated_at = CURRENT_TIMESTAMP
`);
// Calculate ABC classification in one go
await connection.query(`
WITH revenue_ranks AS (
SELECT
product_id,
total_revenue,
total_revenue / SUM(total_revenue) OVER () * 100 as revenue_percent,
ROW_NUMBER() OVER (ORDER BY total_revenue DESC) as rank
FROM product_metrics
WHERE total_revenue > 0
)
UPDATE product_metrics pm
JOIN revenue_ranks r ON pm.product_id = r.product_id
SET abc_class =
CASE
WHEN r.revenue_percent >= 20 THEN 'A'
WHEN r.revenue_percent >= 5 THEN 'B'
ELSE 'C'
END
`);
// Output progress every 5 products or every second
if (current % 5 === 0 || Date.now() - startTime > 1000) {
updateProgress(current, total, 'Calculating product metrics', startTime);
startTime = Date.now();
}
} catch (error) {
logError(error, `Error updating metrics for product ${productId}`);
logError(error, 'Error in batch metrics calculation');
throw error;
}
}
@@ -1051,43 +1091,8 @@ async function main() {
const connection = await pool.getConnection();
try {
// Calculate product metrics
const [products] = await connection.query('SELECT DISTINCT product_id FROM products');
const totalProducts = products.length;
let processedProducts = 0;
const metricsStartTime = Date.now();
outputProgress({
operation: 'Starting product metrics calculation',
message: `Calculating metrics for ${totalProducts} products...`,
current: 0,
total: totalProducts,
percentage: '0'
});
for (const product of products) {
try {
// Update progress every 5 products or 1 second
if (processedProducts % 5 === 0 || (Date.now() - lastUpdate) > 1000) {
updateProgress(processedProducts, totalProducts, 'Calculating product metrics', metricsStartTime);
lastUpdate = Date.now();
}
await updateProductMetrics(connection, product.product_id, metricsStartTime, processedProducts, totalProducts);
processedProducts++;
} catch (error) {
logError(error, `Error calculating metrics for product ${product.product_id}`);
// Continue with next product instead of failing completely
}
}
outputProgress({
operation: 'Product metrics calculation completed',
current: processedProducts,
total: totalProducts,
duration: formatDuration((Date.now() - metricsStartTime) / 1000),
percentage: '100'
});
// Calculate metrics in batches
await calculateMetricsInBatch(connection);
// Calculate vendor metrics
await calculateVendorMetrics(connection);

View File

@@ -0,0 +1,170 @@
const mysql = require('mysql2/promise');
const path = require('path');
const dotenv = require('dotenv');
const fs = require('fs');
dotenv.config({ path: path.join(__dirname, '../.env') });
const dbConfig = {
host: process.env.DB_HOST,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME,
multipleStatements: true,
waitForConnections: true,
connectionLimit: 10,
queueLimit: 0,
namedPlaceholders: true
};
// Set up logging
const LOG_DIR = path.join(__dirname, '../logs');
const ERROR_LOG = path.join(LOG_DIR, 'import-errors.log');
const IMPORT_LOG = path.join(LOG_DIR, 'import.log');
// Ensure log directory exists
if (!fs.existsSync(LOG_DIR)) {
fs.mkdirSync(LOG_DIR, { recursive: true });
}
// Helper function to log errors
function logError(error, context = '') {
const timestamp = new Date().toISOString();
const errorMessage = `[${timestamp}] ${context}\nError: ${error.message}\nStack: ${error.stack}\n\n`;
fs.appendFileSync(ERROR_LOG, errorMessage);
console.error(`\n${context}\nError: ${error.message}`);
}
// Helper function to log progress
function outputProgress(data) {
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] ${JSON.stringify(data)}\n`;
fs.appendFileSync(IMPORT_LOG, logMessage);
console.log(JSON.stringify(data));
}
async function resetMetrics() {
let pool;
try {
pool = mysql.createPool(dbConfig);
const connection = await pool.getConnection();
try {
outputProgress({
status: 'running',
operation: 'Starting metrics reset',
message: 'Creating/resetting metrics tables...',
percentage: '0'
});
// Create tables if they don't exist and then truncate them
await connection.query(`
CREATE TABLE IF NOT EXISTS temp_sales_metrics (
product_id INT PRIMARY KEY,
total_quantity_sold INT DEFAULT 0,
total_revenue DECIMAL(10,2) DEFAULT 0.00,
average_price DECIMAL(10,2) DEFAULT 0.00,
last_sale_date DATE,
sales_rank INT
);
CREATE TABLE IF NOT EXISTS temp_purchase_metrics (
product_id INT PRIMARY KEY,
total_quantity_purchased INT DEFAULT 0,
total_cost DECIMAL(10,2) DEFAULT 0.00,
average_cost DECIMAL(10,2) DEFAULT 0.00,
last_purchase_date DATE,
purchase_rank INT
);
CREATE TABLE IF NOT EXISTS product_metrics (
product_id INT PRIMARY KEY,
total_quantity_sold INT DEFAULT 0,
total_revenue DECIMAL(10,2) DEFAULT 0.00,
average_price DECIMAL(10,2) DEFAULT 0.00,
total_quantity_purchased INT DEFAULT 0,
total_cost DECIMAL(10,2) DEFAULT 0.00,
average_cost DECIMAL(10,2) DEFAULT 0.00,
profit_margin DECIMAL(5,2) DEFAULT 0.00,
turnover_rate DECIMAL(5,2) DEFAULT 0.00,
abc_class CHAR(1),
last_calculated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_sale_date DATE,
last_purchase_date DATE,
sales_rank INT,
purchase_rank INT
);
CREATE TABLE IF NOT EXISTS product_time_aggregates (
product_id INT,
period_type ENUM('daily', 'weekly', 'monthly', 'quarterly', 'yearly'),
period_start DATE,
quantity_sold INT DEFAULT 0,
revenue DECIMAL(10,2) DEFAULT 0.00,
quantity_purchased INT DEFAULT 0,
purchase_cost DECIMAL(10,2) DEFAULT 0.00,
PRIMARY KEY (product_id, period_type, period_start)
);
CREATE TABLE IF NOT EXISTS vendor_metrics (
vendor VARCHAR(255) PRIMARY KEY,
total_orders INT DEFAULT 0,
total_items_ordered INT DEFAULT 0,
total_items_received INT DEFAULT 0,
total_spend DECIMAL(10,2) DEFAULT 0.00,
average_order_value DECIMAL(10,2) DEFAULT 0.00,
fulfillment_rate DECIMAL(5,2) DEFAULT 0.00,
average_delivery_days DECIMAL(5,1),
last_order_date DATE,
last_delivery_date DATE
);
TRUNCATE TABLE temp_sales_metrics;
TRUNCATE TABLE temp_purchase_metrics;
TRUNCATE TABLE product_metrics;
TRUNCATE TABLE product_time_aggregates;
TRUNCATE TABLE vendor_metrics;
`);
outputProgress({
status: 'complete',
operation: 'Metrics reset completed',
message: 'All metrics tables have been created/cleared',
percentage: '100'
});
} catch (error) {
logError(error, 'Error resetting metrics tables');
outputProgress({
status: 'error',
error: error.message,
operation: 'Failed to reset metrics'
});
throw error;
} finally {
connection.release();
}
} catch (error) {
logError(error, 'Fatal error during metrics reset');
outputProgress({
status: 'error',
error: error.message,
operation: 'Failed to reset metrics'
});
throw error;
} finally {
if (pool) {
await pool.end();
}
}
}
// Run if called directly
if (require.main === module) {
resetMetrics().catch(error => {
logError(error, 'Unhandled error in main process');
process.exit(1);
});
}
module.exports = resetMetrics;