Clean up old historical data calcs/scripts, optimize calculations to not update every row every time

This commit is contained in:
2025-06-18 15:13:31 -04:00
parent dd82c624d8
commit a97819f4a6
8 changed files with 61 additions and 53 deletions

View File

@@ -6,7 +6,6 @@ const importCategories = require('./import/categories');
const { importProducts } = require('./import/products');
const importOrders = require('./import/orders');
const importPurchaseOrders = require('./import/purchase-orders');
const importHistoricalData = require('./import/historical-data');
dotenv.config({ path: path.join(__dirname, "../.env") });
@@ -15,7 +14,6 @@ const IMPORT_CATEGORIES = true;
const IMPORT_PRODUCTS = true;
const IMPORT_ORDERS = true;
const IMPORT_PURCHASE_ORDERS = true;
const IMPORT_HISTORICAL_DATA = false;
// Add flag for incremental updates
const INCREMENTAL_UPDATE = process.env.INCREMENTAL_UPDATE !== 'false'; // Default to true unless explicitly set to false
@@ -80,8 +78,7 @@ async function main() {
IMPORT_CATEGORIES,
IMPORT_PRODUCTS,
IMPORT_ORDERS,
IMPORT_PURCHASE_ORDERS,
IMPORT_HISTORICAL_DATA
IMPORT_PURCHASE_ORDERS
].filter(Boolean).length;
try {
@@ -129,11 +126,10 @@ async function main() {
'categories_enabled', $2::boolean,
'products_enabled', $3::boolean,
'orders_enabled', $4::boolean,
'purchase_orders_enabled', $5::boolean,
'historical_data_enabled', $6::boolean
'purchase_orders_enabled', $5::boolean
)
) RETURNING id
`, [INCREMENTAL_UPDATE, IMPORT_CATEGORIES, IMPORT_PRODUCTS, IMPORT_ORDERS, IMPORT_PURCHASE_ORDERS, IMPORT_HISTORICAL_DATA]);
`, [INCREMENTAL_UPDATE, IMPORT_CATEGORIES, IMPORT_PRODUCTS, IMPORT_ORDERS, IMPORT_PURCHASE_ORDERS]);
importHistoryId = historyResult.rows[0].id;
} catch (error) {
console.error("Error creating import history record:", error);
@@ -150,8 +146,7 @@ async function main() {
categories: null,
products: null,
orders: null,
purchaseOrders: null,
historicalData: null
purchaseOrders: null
};
let totalRecordsAdded = 0;
@@ -211,32 +206,6 @@ async function main() {
}
}
if (IMPORT_HISTORICAL_DATA) {
try {
results.historicalData = await importHistoricalData(prodConnection, localConnection, INCREMENTAL_UPDATE);
if (isImportCancelled) throw new Error("Import cancelled");
completedSteps++;
console.log('Historical data import result:', results.historicalData);
// Handle potential error status
if (results.historicalData?.status === 'error') {
console.error('Historical data import had an error:', results.historicalData.error);
} else {
totalRecordsAdded += parseInt(results.historicalData?.recordsAdded || 0);
totalRecordsUpdated += parseInt(results.historicalData?.recordsUpdated || 0);
}
} catch (error) {
console.error('Error during historical data import:', error);
// Continue with other imports, don't fail the whole process
results.historicalData = {
status: 'error',
error: error.message,
recordsAdded: 0,
recordsUpdated: 0
};
}
}
const endTime = Date.now();
const totalElapsedSeconds = Math.round((endTime - startTime) / 1000);
@@ -254,14 +223,12 @@ async function main() {
'products_enabled', $5::boolean,
'orders_enabled', $6::boolean,
'purchase_orders_enabled', $7::boolean,
'historical_data_enabled', $8::boolean,
'categories_result', COALESCE($9::jsonb, 'null'::jsonb),
'products_result', COALESCE($10::jsonb, 'null'::jsonb),
'orders_result', COALESCE($11::jsonb, 'null'::jsonb),
'purchase_orders_result', COALESCE($12::jsonb, 'null'::jsonb),
'historical_data_result', COALESCE($13::jsonb, 'null'::jsonb)
'categories_result', COALESCE($8::jsonb, 'null'::jsonb),
'products_result', COALESCE($9::jsonb, 'null'::jsonb),
'orders_result', COALESCE($10::jsonb, 'null'::jsonb),
'purchase_orders_result', COALESCE($11::jsonb, 'null'::jsonb)
)
WHERE id = $14
WHERE id = $12
`, [
totalElapsedSeconds,
parseInt(totalRecordsAdded),
@@ -270,12 +237,10 @@ async function main() {
IMPORT_PRODUCTS,
IMPORT_ORDERS,
IMPORT_PURCHASE_ORDERS,
IMPORT_HISTORICAL_DATA,
JSON.stringify(results.categories),
JSON.stringify(results.products),
JSON.stringify(results.orders),
JSON.stringify(results.purchaseOrders),
JSON.stringify(results.historicalData),
importHistoryId
]);

View File

@@ -1,961 +0,0 @@
const { outputProgress, formatElapsedTime, estimateRemaining, calculateRate } = require('../metrics-new/utils/progress');
const fs = require('fs');
const path = require('path');
const { pipeline } = require('stream');
const { promisify } = require('util');
// Configuration constants to control which tables get imported
const IMPORT_PRODUCT_CURRENT_PRICES = false;
const IMPORT_DAILY_INVENTORY = false;
const IMPORT_PRODUCT_STAT_HISTORY = true;
// For product stat history, limit to more recent data for faster initial import
const USE_RECENT_MONTHS = 12; // Just use the most recent months for product_stat_history
/**
* Validates a date from MySQL before inserting it into PostgreSQL
* @param {string|Date|null} mysqlDate - Date string or object from MySQL
* @returns {string|null} Valid date string or null if invalid
*/
function validateDate(mysqlDate) {
// Handle null, undefined, or empty values
if (!mysqlDate) {
return null;
}
// Convert to string if it's not already
const dateStr = String(mysqlDate);
// Handle MySQL zero dates and empty values
if (dateStr === '0000-00-00' ||
dateStr === '0000-00-00 00:00:00' ||
dateStr.indexOf('0000-00-00') !== -1 ||
dateStr === '') {
return null;
}
// Check if the date is valid
const date = new Date(mysqlDate);
// If the date is invalid or suspiciously old (pre-1970), return null
if (isNaN(date.getTime()) || date.getFullYear() < 1970) {
return null;
}
return mysqlDate;
}
/**
* Imports historical data from MySQL to PostgreSQL
*/
async function importHistoricalData(
prodConnection,
localConnection,
options = {}
) {
const {
incrementalUpdate = true,
oneYearAgo = new Date(new Date().setFullYear(new Date().getFullYear() - 1))
} = options;
const oneYearAgoStr = oneYearAgo.toISOString().split('T')[0];
const startTime = Date.now();
// Use larger batch sizes to improve performance
const BATCH_SIZE = 5000; // For fetching from small tables
const INSERT_BATCH_SIZE = 500; // For inserting to small tables
const LARGE_BATCH_SIZE = 10000; // For fetching from large tables
const LARGE_INSERT_BATCH_SIZE = 1000; // For inserting to large tables
// Calculate date for recent data
const recentDateStr = new Date(
new Date().setMonth(new Date().getMonth() - USE_RECENT_MONTHS)
).toISOString().split('T')[0];
console.log(`Starting import with:
- One year ago date: ${oneYearAgoStr}
- Recent months date: ${recentDateStr} (for product_stat_history)
- Incremental update: ${incrementalUpdate}
- Standard batch size: ${BATCH_SIZE}
- Standard insert batch size: ${INSERT_BATCH_SIZE}
- Large table batch size: ${LARGE_BATCH_SIZE}
- Large table insert batch size: ${LARGE_INSERT_BATCH_SIZE}
- Import product_current_prices: ${IMPORT_PRODUCT_CURRENT_PRICES}
- Import daily_inventory: ${IMPORT_DAILY_INVENTORY}
- Import product_stat_history: ${IMPORT_PRODUCT_STAT_HISTORY}`);
try {
// Get last sync time for incremental updates
const lastSyncTimes = {};
if (incrementalUpdate) {
try {
const syncResult = await localConnection.query(`
SELECT table_name, last_sync_timestamp
FROM sync_status
WHERE table_name IN (
'imported_product_current_prices',
'imported_daily_inventory',
'imported_product_stat_history'
)
`);
// Add check for rows existence and type
if (syncResult && Array.isArray(syncResult.rows)) {
for (const row of syncResult.rows) {
lastSyncTimes[row.table_name] = row.last_sync_timestamp;
console.log(`Last sync time for ${row.table_name}: ${row.last_sync_timestamp}`);
}
} else {
console.warn('Sync status query did not return expected rows. Proceeding without last sync times.');
}
} catch (error) {
console.error('Error fetching sync status:', error);
}
}
// Determine how many tables will be imported
const tablesCount = [
IMPORT_PRODUCT_CURRENT_PRICES,
IMPORT_DAILY_INVENTORY,
IMPORT_PRODUCT_STAT_HISTORY
].filter(Boolean).length;
// Run all imports sequentially for better reliability
console.log(`Starting sequential imports for ${tablesCount} tables...`);
outputProgress({
status: "running",
operation: "Historical data import",
message: `Starting sequential imports for ${tablesCount} tables...`,
current: 0,
total: tablesCount,
elapsed: formatElapsedTime(startTime)
});
let progressCount = 0;
let productCurrentPricesResult = { recordsAdded: 0, recordsUpdated: 0, totalProcessed: 0, errors: [] };
let dailyInventoryResult = { recordsAdded: 0, recordsUpdated: 0, totalProcessed: 0, errors: [] };
let productStatHistoryResult = { recordsAdded: 0, recordsUpdated: 0, totalProcessed: 0, errors: [] };
// Import product current prices
if (IMPORT_PRODUCT_CURRENT_PRICES) {
console.log('Importing product current prices...');
productCurrentPricesResult = await importProductCurrentPrices(
prodConnection,
localConnection,
oneYearAgoStr,
lastSyncTimes['imported_product_current_prices'],
BATCH_SIZE,
INSERT_BATCH_SIZE,
incrementalUpdate,
startTime
);
progressCount++;
outputProgress({
status: "running",
operation: "Historical data import",
message: `Completed import ${progressCount} of ${tablesCount}`,
current: progressCount,
total: tablesCount,
elapsed: formatElapsedTime(startTime)
});
}
// Import daily inventory
if (IMPORT_DAILY_INVENTORY) {
console.log('Importing daily inventory...');
dailyInventoryResult = await importDailyInventory(
prodConnection,
localConnection,
oneYearAgoStr,
lastSyncTimes['imported_daily_inventory'],
BATCH_SIZE,
INSERT_BATCH_SIZE,
incrementalUpdate,
startTime
);
progressCount++;
outputProgress({
status: "running",
operation: "Historical data import",
message: `Completed import ${progressCount} of ${tablesCount}`,
current: progressCount,
total: tablesCount,
elapsed: formatElapsedTime(startTime)
});
}
// Import product stat history - using optimized approach
if (IMPORT_PRODUCT_STAT_HISTORY) {
console.log('Importing product stat history...');
productStatHistoryResult = await importProductStatHistory(
prodConnection,
localConnection,
recentDateStr, // Use more recent date for this massive table
lastSyncTimes['imported_product_stat_history'],
LARGE_BATCH_SIZE,
LARGE_INSERT_BATCH_SIZE,
incrementalUpdate,
startTime,
USE_RECENT_MONTHS // Pass the recent months constant
);
progressCount++;
outputProgress({
status: "running",
operation: "Historical data import",
message: `Completed import ${progressCount} of ${tablesCount}`,
current: progressCount,
total: tablesCount,
elapsed: formatElapsedTime(startTime)
});
}
// Aggregate results
const totalRecordsAdded =
productCurrentPricesResult.recordsAdded +
dailyInventoryResult.recordsAdded +
productStatHistoryResult.recordsAdded;
const totalRecordsUpdated =
productCurrentPricesResult.recordsUpdated +
dailyInventoryResult.recordsUpdated +
productStatHistoryResult.recordsUpdated;
const totalProcessed =
productCurrentPricesResult.totalProcessed +
dailyInventoryResult.totalProcessed +
productStatHistoryResult.totalProcessed;
const allErrors = [
...productCurrentPricesResult.errors,
...dailyInventoryResult.errors,
...productStatHistoryResult.errors
];
// Log import summary
console.log(`
Historical data import complete:
-------------------------------
Records added: ${totalRecordsAdded}
Records updated: ${totalRecordsUpdated}
Total processed: ${totalProcessed}
Errors: ${allErrors.length}
Time taken: ${formatElapsedTime(startTime)}
`);
// Final progress update
outputProgress({
status: "complete",
operation: "Historical data import",
message: `Import complete. Added: ${totalRecordsAdded}, Updated: ${totalRecordsUpdated}, Errors: ${allErrors.length}`,
current: tablesCount,
total: tablesCount,
elapsed: formatElapsedTime(startTime)
});
// Log any errors
if (allErrors.length > 0) {
console.log('Errors encountered during import:');
console.log(JSON.stringify(allErrors, null, 2));
}
// Calculate duration
const endTime = Date.now();
const durationSeconds = Math.round((endTime - startTime) / 1000);
const finalStatus = allErrors.length === 0 ? 'complete' : 'failed';
const errorMessage = allErrors.length > 0 ? JSON.stringify(allErrors) : null;
// Update import history
await localConnection.query(`
INSERT INTO import_history (
table_name,
end_time,
duration_seconds,
records_added,
records_updated,
is_incremental,
status,
error_message,
additional_info
)
VALUES ($1, NOW(), $2, $3, $4, $5, $6, $7, $8)
`, [
'historical_data_combined',
durationSeconds,
totalRecordsAdded,
totalRecordsUpdated,
incrementalUpdate,
finalStatus,
errorMessage,
JSON.stringify({
totalProcessed,
tablesImported: {
imported_product_current_prices: IMPORT_PRODUCT_CURRENT_PRICES,
imported_daily_inventory: IMPORT_DAILY_INVENTORY,
imported_product_stat_history: IMPORT_PRODUCT_STAT_HISTORY
}
})
]);
// Return summary
return {
recordsAdded: totalRecordsAdded,
recordsUpdated: totalRecordsUpdated,
totalProcessed,
errors: allErrors,
timeTaken: formatElapsedTime(startTime)
};
} catch (error) {
console.error('Error importing historical data:', error);
// Final progress update on error
outputProgress({
status: "failed",
operation: "Historical data import",
message: `Import failed: ${error.message}`,
elapsed: formatElapsedTime(startTime)
});
throw error;
}
}
/**
* Imports product_current_prices data from MySQL to PostgreSQL
*/
async function importProductCurrentPrices(
prodConnection,
localConnection,
oneYearAgoStr,
lastSyncTime,
batchSize,
insertBatchSize,
incrementalUpdate,
startTime
) {
let recordsAdded = 0;
let recordsUpdated = 0;
let totalProcessed = 0;
let errors = [];
let offset = 0;
let allProcessed = false;
try {
// Get total count for progress reporting
const [countResult] = await prodConnection.query(`
SELECT COUNT(*) as total
FROM product_current_prices
WHERE (date_active >= ? OR date_deactive >= ?)
${incrementalUpdate && lastSyncTime ? `AND date_deactive > ?` : ''}
`, [oneYearAgoStr, oneYearAgoStr, ...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : [])]);
const totalCount = countResult[0].total;
outputProgress({
status: "running",
operation: "Historical data import - Product Current Prices",
message: `Found ${totalCount} records to process`,
current: 0,
total: totalCount,
elapsed: formatElapsedTime(startTime)
});
// Process in batches for better performance
while (!allProcessed) {
try {
// Fetch batch from production
const [rows] = await prodConnection.query(`
SELECT
price_id,
pid,
qty_buy,
is_min_qty_buy,
price_each,
qty_limit,
no_promo,
checkout_offer,
active,
date_active,
date_deactive
FROM product_current_prices
WHERE (date_active >= ? OR date_deactive >= ?)
${incrementalUpdate && lastSyncTime ? `AND date_deactive > ?` : ''}
ORDER BY price_id
LIMIT ? OFFSET ?
`, [
oneYearAgoStr,
oneYearAgoStr,
...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : []),
batchSize,
offset
]);
if (rows.length === 0) {
allProcessed = true;
break;
}
// Process rows in smaller batches for better performance
for (let i = 0; i < rows.length; i += insertBatchSize) {
const batch = rows.slice(i, i + insertBatchSize);
if (batch.length === 0) continue;
try {
// Build parameterized query to handle NULL values properly
const values = [];
const placeholders = [];
let placeholderIndex = 1;
for (const row of batch) {
const rowPlaceholders = [
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`
];
placeholders.push(`(${rowPlaceholders.join(', ')})`);
values.push(
row.price_id,
row.pid,
row.qty_buy,
row.is_min_qty_buy ? true : false,
row.price_each,
row.qty_limit, // PostgreSQL will handle null values properly
row.no_promo ? true : false,
row.checkout_offer ? true : false,
row.active ? true : false,
validateDate(row.date_active),
validateDate(row.date_deactive)
);
}
// Execute batch insert
const result = await localConnection.query(`
WITH ins AS (
INSERT INTO imported_product_current_prices (
price_id, pid, qty_buy, is_min_qty_buy, price_each, qty_limit,
no_promo, checkout_offer, active, date_active, date_deactive
)
VALUES ${placeholders.join(',\n')}
ON CONFLICT (price_id) DO UPDATE SET
pid = EXCLUDED.pid,
qty_buy = EXCLUDED.qty_buy,
is_min_qty_buy = EXCLUDED.is_min_qty_buy,
price_each = EXCLUDED.price_each,
qty_limit = EXCLUDED.qty_limit,
no_promo = EXCLUDED.no_promo,
checkout_offer = EXCLUDED.checkout_offer,
active = EXCLUDED.active,
date_active = EXCLUDED.date_active,
date_deactive = EXCLUDED.date_deactive,
updated = CURRENT_TIMESTAMP
RETURNING (xmax = 0) AS inserted
)
SELECT
COUNT(*) FILTER (WHERE inserted) AS inserted_count,
COUNT(*) FILTER (WHERE NOT inserted) AS updated_count
FROM ins
`, values);
// Safely update counts based on the result
if (result && result.rows && result.rows.length > 0) {
const insertedCount = parseInt(result.rows[0].inserted_count || 0);
const updatedCount = parseInt(result.rows[0].updated_count || 0);
recordsAdded += insertedCount;
recordsUpdated += updatedCount;
}
} catch (error) {
console.error(`Error in batch import of product_current_prices at offset ${i}:`, error);
errors.push({
table: 'imported_product_current_prices',
batchOffset: i,
batchSize: batch.length,
error: error.message
});
}
}
totalProcessed += rows.length;
offset += rows.length;
// Update progress
outputProgress({
status: "running",
operation: "Historical data import - Product Current Prices",
message: `Processed ${totalProcessed} of ${totalCount} records`,
current: totalProcessed,
total: totalCount,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, totalProcessed, totalCount),
rate: calculateRate(startTime, totalProcessed)
});
} catch (error) {
console.error('Error in batch import of product_current_prices:', error);
errors.push({
table: 'imported_product_current_prices',
error: error.message,
offset: offset,
batchSize: batchSize
});
// Try to continue with next batch
offset += batchSize;
}
}
// Update sync status
await localConnection.query(`
INSERT INTO sync_status (table_name, last_sync_timestamp)
VALUES ('imported_product_current_prices', NOW())
ON CONFLICT (table_name) DO UPDATE SET
last_sync_timestamp = NOW()
`);
return { recordsAdded, recordsUpdated, totalProcessed, errors };
} catch (error) {
console.error('Error in product current prices import:', error);
return {
recordsAdded,
recordsUpdated,
totalProcessed,
errors: [...errors, {
table: 'imported_product_current_prices',
error: error.message
}]
};
}
}
/**
* Imports daily_inventory data from MySQL to PostgreSQL
*/
async function importDailyInventory(
prodConnection,
localConnection,
oneYearAgoStr,
lastSyncTime,
batchSize,
insertBatchSize,
incrementalUpdate,
startTime
) {
let recordsAdded = 0;
let recordsUpdated = 0;
let totalProcessed = 0;
let errors = [];
let offset = 0;
let allProcessed = false;
try {
// Get total count for progress reporting
const [countResult] = await prodConnection.query(`
SELECT COUNT(*) as total
FROM daily_inventory
WHERE date >= ?
${incrementalUpdate && lastSyncTime ? `AND stamp > ?` : ''}
`, [oneYearAgoStr, ...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : [])]);
const totalCount = countResult[0].total;
outputProgress({
status: "running",
operation: "Historical data import - Daily Inventory",
message: `Found ${totalCount} records to process`,
current: 0,
total: totalCount,
elapsed: formatElapsedTime(startTime)
});
// Process in batches for better performance
while (!allProcessed) {
try {
// Fetch batch from production
const [rows] = await prodConnection.query(`
SELECT
date,
pid,
amountsold,
times_sold,
qtyreceived,
price,
costeach,
stamp
FROM daily_inventory
WHERE date >= ?
${incrementalUpdate && lastSyncTime ? `AND stamp > ?` : ''}
ORDER BY date, pid
LIMIT ? OFFSET ?
`, [
oneYearAgoStr,
...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : []),
batchSize,
offset
]);
if (rows.length === 0) {
allProcessed = true;
break;
}
// Process rows in smaller batches for better performance
for (let i = 0; i < rows.length; i += insertBatchSize) {
const batch = rows.slice(i, i + insertBatchSize);
if (batch.length === 0) continue;
try {
// Build parameterized query to handle NULL values properly
const values = [];
const placeholders = [];
let placeholderIndex = 1;
for (const row of batch) {
const rowPlaceholders = [
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`
];
placeholders.push(`(${rowPlaceholders.join(', ')})`);
values.push(
validateDate(row.date),
row.pid,
row.amountsold || 0,
row.times_sold || 0,
row.qtyreceived || 0,
row.price || 0,
row.costeach || 0,
validateDate(row.stamp)
);
}
// Execute batch insert
const result = await localConnection.query(`
WITH ins AS (
INSERT INTO imported_daily_inventory (
date, pid, amountsold, times_sold, qtyreceived, price, costeach, stamp
)
VALUES ${placeholders.join(',\n')}
ON CONFLICT (date, pid) DO UPDATE SET
amountsold = EXCLUDED.amountsold,
times_sold = EXCLUDED.times_sold,
qtyreceived = EXCLUDED.qtyreceived,
price = EXCLUDED.price,
costeach = EXCLUDED.costeach,
stamp = EXCLUDED.stamp,
updated = CURRENT_TIMESTAMP
RETURNING (xmax = 0) AS inserted
)
SELECT
COUNT(*) FILTER (WHERE inserted) AS inserted_count,
COUNT(*) FILTER (WHERE NOT inserted) AS updated_count
FROM ins
`, values);
// Safely update counts based on the result
if (result && result.rows && result.rows.length > 0) {
const insertedCount = parseInt(result.rows[0].inserted_count || 0);
const updatedCount = parseInt(result.rows[0].updated_count || 0);
recordsAdded += insertedCount;
recordsUpdated += updatedCount;
}
} catch (error) {
console.error(`Error in batch import of daily_inventory at offset ${i}:`, error);
errors.push({
table: 'imported_daily_inventory',
batchOffset: i,
batchSize: batch.length,
error: error.message
});
}
}
totalProcessed += rows.length;
offset += rows.length;
// Update progress
outputProgress({
status: "running",
operation: "Historical data import - Daily Inventory",
message: `Processed ${totalProcessed} of ${totalCount} records`,
current: totalProcessed,
total: totalCount,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, totalProcessed, totalCount),
rate: calculateRate(startTime, totalProcessed)
});
} catch (error) {
console.error('Error in batch import of daily_inventory:', error);
errors.push({
table: 'imported_daily_inventory',
error: error.message,
offset: offset,
batchSize: batchSize
});
// Try to continue with next batch
offset += batchSize;
}
}
// Update sync status
await localConnection.query(`
INSERT INTO sync_status (table_name, last_sync_timestamp)
VALUES ('imported_daily_inventory', NOW())
ON CONFLICT (table_name) DO UPDATE SET
last_sync_timestamp = NOW()
`);
return { recordsAdded, recordsUpdated, totalProcessed, errors };
} catch (error) {
console.error('Error in daily inventory import:', error);
return {
recordsAdded,
recordsUpdated,
totalProcessed,
errors: [...errors, {
table: 'imported_daily_inventory',
error: error.message
}]
};
}
}
/**
* Imports product_stat_history data from MySQL to PostgreSQL
* Using fast direct inserts without conflict checking
*/
async function importProductStatHistory(
prodConnection,
localConnection,
recentDateStr, // Use more recent date instead of one year ago
lastSyncTime,
batchSize,
insertBatchSize,
incrementalUpdate,
startTime,
recentMonths // Add parameter for recent months
) {
let recordsAdded = 0;
let recordsUpdated = 0;
let totalProcessed = 0;
let errors = [];
let offset = 0;
let allProcessed = false;
let lastRateCheck = Date.now();
let lastProcessed = 0;
try {
// Get total count for progress reporting
const [countResult] = await prodConnection.query(`
SELECT COUNT(*) as total
FROM product_stat_history
WHERE date >= ?
${incrementalUpdate && lastSyncTime ? `AND date > ?` : ''}
`, [recentDateStr, ...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : [])]);
const totalCount = countResult[0].total;
console.log(`Found ${totalCount} records to process in product_stat_history (using recent date: ${recentDateStr})`);
// Progress indicator
outputProgress({
status: "running",
operation: "Historical data import - Product Stat History",
message: `Found ${totalCount} records to process (last ${recentMonths} months only)`,
current: 0,
total: totalCount,
elapsed: formatElapsedTime(startTime)
});
// If not incremental, truncate the table first for better performance
if (!incrementalUpdate) {
console.log('Truncating imported_product_stat_history for full import...');
await localConnection.query('TRUNCATE TABLE imported_product_stat_history');
} else if (lastSyncTime) {
// For incremental updates, delete records that will be reimported
console.log(`Deleting records from imported_product_stat_history since ${lastSyncTime}...`);
await localConnection.query('DELETE FROM imported_product_stat_history WHERE date > $1', [lastSyncTime]);
}
// Process in batches for better performance
while (!allProcessed) {
try {
// Fetch batch from production with minimal filtering and no sorting
const [rows] = await prodConnection.query(`
SELECT
pid,
date,
COALESCE(score, 0) as score,
COALESCE(score2, 0) as score2,
COALESCE(qty_in_baskets, 0) as qty_in_baskets,
COALESCE(qty_sold, 0) as qty_sold,
COALESCE(notifies_set, 0) as notifies_set,
COALESCE(visibility_score, 0) as visibility_score,
COALESCE(health_score, 0) as health_score,
COALESCE(sold_view_score, 0) as sold_view_score
FROM product_stat_history
WHERE date >= ?
${incrementalUpdate && lastSyncTime ? `AND date > ?` : ''}
LIMIT ? OFFSET ?
`, [
recentDateStr,
...(incrementalUpdate && lastSyncTime ? [lastSyncTime] : []),
batchSize,
offset
]);
if (rows.length === 0) {
allProcessed = true;
break;
}
// Process rows in smaller batches for better performance
for (let i = 0; i < rows.length; i += insertBatchSize) {
const batch = rows.slice(i, i + insertBatchSize);
if (batch.length === 0) continue;
try {
// Build parameterized query to handle NULL values properly
const values = [];
const placeholders = [];
let placeholderIndex = 1;
for (const row of batch) {
const rowPlaceholders = [
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`,
`$${placeholderIndex++}`
];
placeholders.push(`(${rowPlaceholders.join(', ')})`);
values.push(
row.pid,
validateDate(row.date),
row.score,
row.score2,
row.qty_in_baskets,
row.qty_sold,
row.notifies_set,
row.visibility_score,
row.health_score,
row.sold_view_score
);
}
// Execute direct batch insert without conflict checking
await localConnection.query(`
INSERT INTO imported_product_stat_history (
pid, date, score, score2, qty_in_baskets, qty_sold, notifies_set,
visibility_score, health_score, sold_view_score
)
VALUES ${placeholders.join(',\n')}
`, values);
// All inserts are new records when using this approach
recordsAdded += batch.length;
} catch (error) {
console.error(`Error in batch insert of product_stat_history at offset ${i}:`, error);
errors.push({
table: 'imported_product_stat_history',
batchOffset: i,
batchSize: batch.length,
error: error.message
});
}
}
totalProcessed += rows.length;
offset += rows.length;
// Calculate current rate every 10 seconds or 100,000 records
const now = Date.now();
if (now - lastRateCheck > 10000 || totalProcessed - lastProcessed > 100000) {
const timeElapsed = (now - lastRateCheck) / 1000; // seconds
const recordsProcessed = totalProcessed - lastProcessed;
const currentRate = Math.round(recordsProcessed / timeElapsed);
console.log(`Current import rate: ${currentRate} records/second`);
lastRateCheck = now;
lastProcessed = totalProcessed;
}
// Update progress
outputProgress({
status: "running",
operation: "Historical data import - Product Stat History",
message: `Processed ${totalProcessed} of ${totalCount} records`,
current: totalProcessed,
total: totalCount,
elapsed: formatElapsedTime(startTime),
remaining: estimateRemaining(startTime, totalProcessed, totalCount),
rate: calculateRate(startTime, totalProcessed)
});
} catch (error) {
console.error('Error in batch import of product_stat_history:', error);
errors.push({
table: 'imported_product_stat_history',
error: error.message,
offset: offset,
batchSize: batchSize
});
// Try to continue with next batch
offset += batchSize;
}
}
// Update sync status
await localConnection.query(`
INSERT INTO sync_status (table_name, last_sync_timestamp)
VALUES ('imported_product_stat_history', NOW())
ON CONFLICT (table_name) DO UPDATE SET
last_sync_timestamp = NOW()
`);
return { recordsAdded, recordsUpdated, totalProcessed, errors };
} catch (error) {
console.error('Error in product stat history import:', error);
return {
recordsAdded,
recordsUpdated,
totalProcessed,
errors: [...errors, {
table: 'imported_product_stat_history',
error: error.message
}]
};
}
}
module.exports = importHistoricalData;

View File

@@ -1,677 +0,0 @@
const path = require('path');
const fs = require('fs');
const os = require('os'); // For detecting CPU cores
// Get the base directory (the directory containing the inventory-server folder)
const baseDir = path.resolve(__dirname, '../../..');
// Load environment variables from the inventory-server directory
require('dotenv').config({ path: path.resolve(__dirname, '../..', '.env') });
// Configure statement timeout (30 minutes)
const PG_STATEMENT_TIMEOUT_MS = 1800000;
// Add error handler for uncaught exceptions
process.on('uncaughtException', (error) => {
console.error('Uncaught Exception:', error);
process.exit(1);
});
// Add error handler for unhandled promise rejections
process.on('unhandledRejection', (reason, promise) => {
console.error('Unhandled Rejection at:', promise, 'reason:', reason);
process.exit(1);
});
// Load progress module
const progress = require('../utils/progress');
// Store progress functions in global scope to ensure availability
global.formatElapsedTime = progress.formatElapsedTime;
global.estimateRemaining = progress.estimateRemaining;
global.calculateRate = progress.calculateRate;
global.outputProgress = progress.outputProgress;
global.clearProgress = progress.clearProgress;
global.getProgress = progress.getProgress;
global.logError = progress.logError;
// Load database module
const { getConnection, closePool } = require('../utils/db');
// Add cancel handler
let isCancelled = false;
let runningQueryPromise = null;
function cancelCalculation() {
if (!isCancelled) {
isCancelled = true;
console.log('Calculation has been cancelled by user');
// Store the query promise to potentially cancel it
const queryToCancel = runningQueryPromise;
if (queryToCancel) {
console.log('Attempting to cancel the running query...');
}
// Force-terminate any query that's been running for more than 5 seconds
try {
const connection = getConnection();
connection.then(async (conn) => {
try {
// Identify and terminate long-running queries from our application
await conn.query(`
SELECT pg_cancel_backend(pid)
FROM pg_stat_activity
WHERE query_start < now() - interval '5 seconds'
AND application_name = 'populate_metrics'
AND query NOT LIKE '%pg_cancel_backend%'
`);
// Release connection
conn.release();
} catch (err) {
console.error('Error during force cancellation:', err);
conn.release();
}
}).catch(err => {
console.error('Could not get connection for cancellation:', err);
});
} catch (err) {
console.error('Failed to terminate running queries:', err);
}
}
return {
success: true,
message: 'Calculation has been cancelled'
};
}
// Handle SIGTERM signal for cancellation
process.on('SIGTERM', cancelCalculation);
process.on('SIGINT', cancelCalculation);
const calculateInitialMetrics = (client, onProgress) => {
return client.query(`
-- Truncate the existing metrics tables to ensure clean data
TRUNCATE TABLE public.daily_product_snapshots;
TRUNCATE TABLE public.product_metrics;
-- First let's create daily snapshots for all products with order activity
WITH SalesData AS (
SELECT
p.pid,
p.sku,
o.date::date AS order_date,
-- Count orders to ensure we only include products with real activity
COUNT(o.id) as order_count,
-- Aggregate Sales (Quantity > 0, Status not Canceled/Returned)
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.quantity ELSE 0 END), 0) AS units_sold,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.price * o.quantity ELSE 0 END), 0.00) AS gross_revenue_unadjusted,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN o.discount ELSE 0 END), 0.00) AS discounts,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN COALESCE(o.costeach, p.landing_cost_price, p.cost_price) * o.quantity ELSE 0 END), 0.00) AS cogs,
COALESCE(SUM(CASE WHEN o.quantity > 0 AND COALESCE(o.status, 'pending') NOT IN ('canceled', 'returned') THEN p.regular_price * o.quantity ELSE 0 END), 0.00) AS gross_regular_revenue,
-- Aggregate Returns (Quantity < 0 or Status = Returned)
COALESCE(SUM(CASE WHEN o.quantity < 0 OR COALESCE(o.status, 'pending') = 'returned' THEN ABS(o.quantity) ELSE 0 END), 0) AS units_returned,
COALESCE(SUM(CASE WHEN o.quantity < 0 OR COALESCE(o.status, 'pending') = 'returned' THEN o.price * ABS(o.quantity) ELSE 0 END), 0.00) AS returns_revenue
FROM public.products p
LEFT JOIN public.orders o ON p.pid = o.pid
GROUP BY p.pid, p.sku, o.date::date
HAVING COUNT(o.id) > 0 -- Only include products with actual orders
),
ReceivingData AS (
SELECT
r.pid,
r.received_date::date AS receiving_date,
-- Count receiving documents to ensure we only include products with real activity
COUNT(DISTINCT r.receiving_id) as receiving_count,
-- Calculate received quantity for this day
SUM(r.received_quantity) AS units_received,
-- Calculate received cost for this day
SUM(r.received_quantity * r.unit_cost) AS cost_received
FROM public.receivings r
GROUP BY r.pid, r.received_date::date
HAVING COUNT(DISTINCT r.receiving_id) > 0 OR SUM(r.received_quantity) > 0
),
-- Get current stock quantities
StockData AS (
SELECT
p.pid,
p.stock_quantity,
COALESCE(p.landing_cost_price, p.cost_price, 0.00) as effective_cost_price,
COALESCE(p.price, 0.00) as current_price,
COALESCE(p.regular_price, 0.00) as current_regular_price
FROM public.products p
),
-- Combine sales and receiving dates to get all activity dates
DatePidCombos AS (
SELECT DISTINCT pid, order_date AS activity_date FROM SalesData
UNION
SELECT DISTINCT pid, receiving_date FROM ReceivingData
),
-- Insert daily snapshots for all product-date combinations
SnapshotInsert AS (
INSERT INTO public.daily_product_snapshots (
snapshot_date,
pid,
sku,
eod_stock_quantity,
eod_stock_cost,
eod_stock_retail,
eod_stock_gross,
stockout_flag,
units_sold,
units_returned,
gross_revenue,
discounts,
returns_revenue,
net_revenue,
cogs,
gross_regular_revenue,
profit,
units_received,
cost_received,
calculation_timestamp
)
SELECT
d.activity_date AS snapshot_date,
d.pid,
p.sku,
-- Use current stock as approximation, since historical stock data is not available
s.stock_quantity AS eod_stock_quantity,
s.stock_quantity * s.effective_cost_price AS eod_stock_cost,
s.stock_quantity * s.current_price AS eod_stock_retail,
s.stock_quantity * s.current_regular_price AS eod_stock_gross,
(s.stock_quantity <= 0) AS stockout_flag,
-- Sales metrics
COALESCE(sd.units_sold, 0),
COALESCE(sd.units_returned, 0),
COALESCE(sd.gross_revenue_unadjusted, 0.00),
COALESCE(sd.discounts, 0.00),
COALESCE(sd.returns_revenue, 0.00),
COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00) AS net_revenue,
COALESCE(sd.cogs, 0.00),
COALESCE(sd.gross_regular_revenue, 0.00),
(COALESCE(sd.gross_revenue_unadjusted, 0.00) - COALESCE(sd.discounts, 0.00)) - COALESCE(sd.cogs, 0.00) AS profit,
-- Receiving metrics
COALESCE(rd.units_received, 0),
COALESCE(rd.cost_received, 0.00),
now() -- calculation timestamp
FROM DatePidCombos d
JOIN public.products p ON d.pid = p.pid
LEFT JOIN SalesData sd ON d.pid = sd.pid AND d.activity_date = sd.order_date
LEFT JOIN ReceivingData rd ON d.pid = rd.pid AND d.activity_date = rd.receiving_date
LEFT JOIN StockData s ON d.pid = s.pid
RETURNING pid, snapshot_date
),
-- Now build the aggregated product metrics from the daily snapshots
MetricsInsert AS (
INSERT INTO public.product_metrics (
pid,
sku,
current_stock_quantity,
current_stock_cost,
current_stock_retail,
current_stock_msrp,
is_out_of_stock,
total_units_sold,
total_units_returned,
return_rate,
gross_revenue,
total_discounts,
total_returns,
net_revenue,
total_cogs,
total_gross_revenue,
total_profit,
profit_margin,
avg_daily_units,
reorder_point,
reorder_alert,
days_of_supply,
sales_velocity,
sales_velocity_score,
rank_by_revenue,
rank_by_quantity,
rank_by_profit,
total_received_quantity,
total_received_cost,
last_sold_date,
last_received_date,
days_since_last_sale,
days_since_last_received,
calculation_timestamp
)
SELECT
p.pid,
p.sku,
p.stock_quantity AS current_stock_quantity,
p.stock_quantity * COALESCE(p.landing_cost_price, p.cost_price, 0) AS current_stock_cost,
p.stock_quantity * COALESCE(p.price, 0) AS current_stock_retail,
p.stock_quantity * COALESCE(p.regular_price, 0) AS current_stock_msrp,
(p.stock_quantity <= 0) AS is_out_of_stock,
-- Aggregate metrics
COALESCE(SUM(ds.units_sold), 0) AS total_units_sold,
COALESCE(SUM(ds.units_returned), 0) AS total_units_returned,
CASE
WHEN COALESCE(SUM(ds.units_sold), 0) > 0
THEN COALESCE(SUM(ds.units_returned), 0)::float / NULLIF(COALESCE(SUM(ds.units_sold), 0), 0)
ELSE 0
END AS return_rate,
COALESCE(SUM(ds.gross_revenue), 0) AS gross_revenue,
COALESCE(SUM(ds.discounts), 0) AS total_discounts,
COALESCE(SUM(ds.returns_revenue), 0) AS total_returns,
COALESCE(SUM(ds.net_revenue), 0) AS net_revenue,
COALESCE(SUM(ds.cogs), 0) AS total_cogs,
COALESCE(SUM(ds.gross_regular_revenue), 0) AS total_gross_revenue,
COALESCE(SUM(ds.profit), 0) AS total_profit,
CASE
WHEN COALESCE(SUM(ds.net_revenue), 0) > 0
THEN COALESCE(SUM(ds.profit), 0) / NULLIF(COALESCE(SUM(ds.net_revenue), 0), 0)
ELSE 0
END AS profit_margin,
-- Calculate average daily units
COALESCE(AVG(ds.units_sold), 0) AS avg_daily_units,
-- Calculate reorder point (simplified, can be enhanced with lead time and safety stock)
CEILING(COALESCE(AVG(ds.units_sold) * 14, 0)) AS reorder_point,
(p.stock_quantity <= CEILING(COALESCE(AVG(ds.units_sold) * 14, 0))) AS reorder_alert,
-- Days of supply based on average daily sales
CASE
WHEN COALESCE(AVG(ds.units_sold), 0) > 0
THEN p.stock_quantity / NULLIF(COALESCE(AVG(ds.units_sold), 0), 0)
ELSE NULL
END AS days_of_supply,
-- Sales velocity (average units sold per day over last 30 days)
(SELECT COALESCE(AVG(recent.units_sold), 0)
FROM public.daily_product_snapshots recent
WHERE recent.pid = p.pid
AND recent.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
) AS sales_velocity,
-- Placeholder for sales velocity score (can be calculated based on velocity)
0 AS sales_velocity_score,
-- Will be updated later by ranking procedure
0 AS rank_by_revenue,
0 AS rank_by_quantity,
0 AS rank_by_profit,
-- Receiving data
COALESCE(SUM(ds.units_received), 0) AS total_received_quantity,
COALESCE(SUM(ds.cost_received), 0) AS total_received_cost,
-- Date metrics
(SELECT MAX(sd.snapshot_date)
FROM public.daily_product_snapshots sd
WHERE sd.pid = p.pid AND sd.units_sold > 0
) AS last_sold_date,
(SELECT MAX(rd.snapshot_date)
FROM public.daily_product_snapshots rd
WHERE rd.pid = p.pid AND rd.units_received > 0
) AS last_received_date,
-- Calculate days since last sale/received
CASE
WHEN (SELECT MAX(sd.snapshot_date)
FROM public.daily_product_snapshots sd
WHERE sd.pid = p.pid AND sd.units_sold > 0) IS NOT NULL
THEN (CURRENT_DATE - (SELECT MAX(sd.snapshot_date)
FROM public.daily_product_snapshots sd
WHERE sd.pid = p.pid AND sd.units_sold > 0))::integer
ELSE NULL
END AS days_since_last_sale,
CASE
WHEN (SELECT MAX(rd.snapshot_date)
FROM public.daily_product_snapshots rd
WHERE rd.pid = p.pid AND rd.units_received > 0) IS NOT NULL
THEN (CURRENT_DATE - (SELECT MAX(rd.snapshot_date)
FROM public.daily_product_snapshots rd
WHERE rd.pid = p.pid AND rd.units_received > 0))::integer
ELSE NULL
END AS days_since_last_received,
now() -- calculation timestamp
FROM public.products p
LEFT JOIN public.daily_product_snapshots ds ON p.pid = ds.pid
GROUP BY p.pid, p.sku, p.stock_quantity, p.landing_cost_price, p.cost_price, p.price, p.regular_price
)
-- Update the calculate_status table
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)
VALUES
('daily_snapshots', now()),
('product_metrics', now())
ON CONFLICT (module_name) DO UPDATE
SET last_calculation_timestamp = now();
-- Finally, update the ranks for products
UPDATE public.product_metrics pm SET
rank_by_revenue = rev_ranks.rank
FROM (
SELECT pid, RANK() OVER (ORDER BY net_revenue DESC) AS rank
FROM public.product_metrics
WHERE net_revenue > 0
) rev_ranks
WHERE pm.pid = rev_ranks.pid;
UPDATE public.product_metrics pm SET
rank_by_quantity = qty_ranks.rank
FROM (
SELECT pid, RANK() OVER (ORDER BY total_units_sold DESC) AS rank
FROM public.product_metrics
WHERE total_units_sold > 0
) qty_ranks
WHERE pm.pid = qty_ranks.pid;
UPDATE public.product_metrics pm SET
rank_by_profit = profit_ranks.rank
FROM (
SELECT pid, RANK() OVER (ORDER BY total_profit DESC) AS rank
FROM public.product_metrics
WHERE total_profit > 0
) profit_ranks
WHERE pm.pid = profit_ranks.pid;
-- Return count of products with metrics
SELECT COUNT(*) AS product_count FROM public.product_metrics
`);
};
async function populateInitialMetrics() {
let connection;
const startTime = Date.now();
let calculateHistoryId;
try {
// Clean up any previously running calculations
connection = await getConnection({
// Add performance-related settings
application_name: 'populate_metrics',
statement_timeout: PG_STATEMENT_TIMEOUT_MS, // 30 min timeout per statement
});
// Ensure the calculate_status table exists and has the correct structure
await connection.query(`
CREATE TABLE IF NOT EXISTS calculate_status (
module_name TEXT PRIMARY KEY,
last_calculation_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
)
`);
await connection.query(`
UPDATE calculate_history
SET
status = 'cancelled',
end_time = NOW(),
duration_seconds = EXTRACT(EPOCH FROM (NOW() - start_time))::INTEGER,
error_message = 'Previous calculation was not completed properly'
WHERE status = 'running' AND additional_info->>'type' = 'populate_initial_metrics'
`);
// Create history record for this calculation
const historyResult = await connection.query(`
INSERT INTO calculate_history (
start_time,
status,
additional_info
) VALUES (
NOW(),
'running',
jsonb_build_object(
'type', 'populate_initial_metrics',
'sql_file', 'populate_initial_product_metrics.sql'
)
) RETURNING id
`);
calculateHistoryId = historyResult.rows[0].id;
// Initialize progress
global.outputProgress({
status: 'running',
operation: 'Starting initial product metrics population',
current: 0,
total: 100,
elapsed: '0s',
remaining: 'Calculating... (this may take a while)',
rate: 0,
percentage: '0',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
},
historyId: calculateHistoryId
});
// Prepare the database - analyze tables
global.outputProgress({
status: 'running',
operation: 'Analyzing database tables for better query performance',
current: 2,
total: 100,
elapsed: global.formatElapsedTime(startTime),
remaining: 'Analyzing...',
rate: 0,
percentage: '2',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
},
historyId: calculateHistoryId
});
// Enable better query planning and parallel operations
await connection.query(`
-- Analyze tables for better query planning
ANALYZE public.products;
ANALYZE public.purchase_orders;
ANALYZE public.daily_product_snapshots;
ANALYZE public.orders;
-- Enable parallel operations
SET LOCAL enable_parallel_append = on;
SET LOCAL enable_parallel_hash = on;
SET LOCAL max_parallel_workers_per_gather = 4;
-- Larger work memory for complex sorts/joins
SET LOCAL work_mem = '128MB';
`).catch(err => {
// Non-fatal if analyze fails
console.warn('Failed to analyze tables (non-fatal):', err.message);
});
// Execute the SQL query
global.outputProgress({
status: 'running',
operation: 'Executing initial metrics SQL query',
current: 5,
total: 100,
elapsed: global.formatElapsedTime(startTime),
remaining: 'Calculating... (this could take several hours with 150M+ records)',
rate: 0,
percentage: '5',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
},
historyId: calculateHistoryId
});
// Read the SQL file
const sqlFilePath = path.resolve(__dirname, 'populate_initial_product_metrics.sql');
console.log('Base directory:', baseDir);
console.log('Script directory:', __dirname);
console.log('SQL file path:', sqlFilePath);
console.log('Current working directory:', process.cwd());
if (!fs.existsSync(sqlFilePath)) {
throw new Error(`SQL file not found at ${sqlFilePath}`);
}
// Read and clean up the SQL (Slightly more robust cleaning)
const sqlQuery = fs.readFileSync(sqlFilePath, 'utf8')
.replace(/\r\n/g, '\n') // Handle Windows endings
.replace(/\r/g, '\n') // Handle old Mac endings
.trim(); // Remove leading/trailing whitespace VERY IMPORTANT
// Log details again AFTER cleaning
console.log('SQL Query length (cleaned):', sqlQuery.length);
console.log('SQL Query structure validation:');
console.log('- Contains DO block:', sqlQuery.includes('DO $$') || sqlQuery.includes('DO $')); // Check both types of tag start
console.log('- Contains BEGIN:', sqlQuery.includes('BEGIN'));
console.log('- Contains END:', sqlQuery.includes('END $$;') || sqlQuery.includes('END $')); // Check both types of tag end
console.log('- First 50 chars:', JSON.stringify(sqlQuery.slice(0, 50)));
console.log('- Last 100 chars (cleaned):', JSON.stringify(sqlQuery.slice(-100)));
// Final check to ensure clean SQL ending
if (!sqlQuery.endsWith('END $$;')) {
console.warn('WARNING: SQL does not end with "END $$;". This might cause issues.');
console.log('Exact ending:', JSON.stringify(sqlQuery.slice(-20)));
}
// Execute the script
console.log('Starting initial product metrics population...');
// Track the query promise for potential cancellation
runningQueryPromise = connection.query({
text: sqlQuery,
rowMode: 'array'
});
await runningQueryPromise;
runningQueryPromise = null;
// Update progress to 100%
global.outputProgress({
status: 'complete',
operation: 'Initial product metrics population complete',
current: 100,
total: 100,
elapsed: global.formatElapsedTime(startTime),
remaining: '0s',
rate: 0,
percentage: '100',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: Math.round((Date.now() - startTime) / 1000)
},
historyId: calculateHistoryId
});
// Update history with completion
await connection.query(`
UPDATE calculate_history
SET
end_time = NOW(),
duration_seconds = $1,
status = 'completed'
WHERE id = $2
`, [Math.round((Date.now() - startTime) / 1000), calculateHistoryId]);
// Clear progress file on successful completion
global.clearProgress();
return {
success: true,
message: 'Initial product metrics population completed successfully',
duration: Math.round((Date.now() - startTime) / 1000)
};
} catch (error) {
const endTime = Date.now();
const totalElapsedSeconds = Math.round((endTime - startTime) / 1000);
// Enhanced error logging
console.error('Error details:', {
message: error.message,
code: error.code,
hint: error.hint,
position: error.position,
detail: error.detail,
where: error.where ? error.where.substring(0, 500) + '...' : undefined, // Truncate to avoid huge logs
severity: error.severity,
file: error.file,
line: error.line,
routine: error.routine
});
// Update history with error
if (connection && calculateHistoryId) {
await connection.query(`
UPDATE calculate_history
SET
end_time = NOW(),
duration_seconds = $1,
status = $2,
error_message = $3
WHERE id = $4
`, [
totalElapsedSeconds,
isCancelled ? 'cancelled' : 'failed',
error.message,
calculateHistoryId
]);
}
if (isCancelled) {
global.outputProgress({
status: 'cancelled',
operation: 'Calculation cancelled',
current: 50,
total: 100,
elapsed: global.formatElapsedTime(startTime),
remaining: null,
rate: 0,
percentage: '50',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: totalElapsedSeconds
},
historyId: calculateHistoryId
});
} else {
global.outputProgress({
status: 'error',
operation: 'Error during initial product metrics population',
message: error.message,
current: 0,
total: 100,
elapsed: global.formatElapsedTime(startTime),
remaining: null,
rate: 0,
percentage: '0',
timing: {
start_time: new Date(startTime).toISOString(),
end_time: new Date().toISOString(),
elapsed_seconds: totalElapsedSeconds
},
historyId: calculateHistoryId
});
}
console.error('Error during initial product metrics population:', error);
return {
success: false,
error: error.message,
duration: totalElapsedSeconds
};
} finally {
if (connection) {
connection.release();
}
await closePool();
}
}
// Start population process
populateInitialMetrics()
.then(result => {
if (result.success) {
console.log(`Initial product metrics population completed successfully in ${result.duration} seconds`);
process.exit(0);
} else {
console.error(`Initial product metrics population failed: ${result.error}`);
process.exit(1);
}
})
.catch(err => {
console.error('Unexpected error:', err);
process.exit(1);
});

View File

@@ -95,7 +95,14 @@ BEGIN
profit_30d = EXCLUDED.profit_30d, cogs_30d = EXCLUDED.cogs_30d,
sales_365d = EXCLUDED.sales_365d, revenue_365d = EXCLUDED.revenue_365d,
lifetime_sales = EXCLUDED.lifetime_sales, lifetime_revenue = EXCLUDED.lifetime_revenue,
avg_margin_30d = EXCLUDED.avg_margin_30d;
avg_margin_30d = EXCLUDED.avg_margin_30d
WHERE -- Only update if at least one value has changed
brand_metrics.product_count IS DISTINCT FROM EXCLUDED.product_count OR
brand_metrics.active_product_count IS DISTINCT FROM EXCLUDED.active_product_count OR
brand_metrics.current_stock_units IS DISTINCT FROM EXCLUDED.current_stock_units OR
brand_metrics.sales_30d IS DISTINCT FROM EXCLUDED.sales_30d OR
brand_metrics.revenue_30d IS DISTINCT FROM EXCLUDED.revenue_30d OR
brand_metrics.lifetime_sales IS DISTINCT FROM EXCLUDED.lifetime_sales;
-- Update calculate_status
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)

View File

@@ -238,7 +238,8 @@ BEGIN
category_type = EXCLUDED.category_type,
parent_id = EXCLUDED.parent_id,
last_calculated = EXCLUDED.last_calculated,
-- Update rolled-up metrics
-- ROLLED-UP METRICS (includes this category + all descendants)
product_count = EXCLUDED.product_count,
active_product_count = EXCLUDED.active_product_count,
replenishable_product_count = EXCLUDED.replenishable_product_count,
@@ -250,7 +251,8 @@ BEGIN
profit_30d = EXCLUDED.profit_30d, cogs_30d = EXCLUDED.cogs_30d,
sales_365d = EXCLUDED.sales_365d, revenue_365d = EXCLUDED.revenue_365d,
lifetime_sales = EXCLUDED.lifetime_sales, lifetime_revenue = EXCLUDED.lifetime_revenue,
-- Update direct metrics
-- DIRECT METRICS (only products directly in this category)
direct_product_count = EXCLUDED.direct_product_count,
direct_active_product_count = EXCLUDED.direct_active_product_count,
direct_replenishable_product_count = EXCLUDED.direct_replenishable_product_count,
@@ -262,9 +264,19 @@ BEGIN
direct_profit_30d = EXCLUDED.direct_profit_30d, direct_cogs_30d = EXCLUDED.direct_cogs_30d,
direct_sales_365d = EXCLUDED.direct_sales_365d, direct_revenue_365d = EXCLUDED.direct_revenue_365d,
direct_lifetime_sales = EXCLUDED.direct_lifetime_sales, direct_lifetime_revenue = EXCLUDED.direct_lifetime_revenue,
-- Update KPIs
-- Calculated KPIs
avg_margin_30d = EXCLUDED.avg_margin_30d,
stock_turn_30d = EXCLUDED.stock_turn_30d;
stock_turn_30d = EXCLUDED.stock_turn_30d
WHERE -- Only update if at least one value has changed
category_metrics.product_count IS DISTINCT FROM EXCLUDED.product_count OR
category_metrics.active_product_count IS DISTINCT FROM EXCLUDED.active_product_count OR
category_metrics.current_stock_units IS DISTINCT FROM EXCLUDED.current_stock_units OR
category_metrics.sales_30d IS DISTINCT FROM EXCLUDED.sales_30d OR
category_metrics.revenue_30d IS DISTINCT FROM EXCLUDED.revenue_30d OR
category_metrics.lifetime_sales IS DISTINCT FROM EXCLUDED.lifetime_sales OR
category_metrics.direct_product_count IS DISTINCT FROM EXCLUDED.direct_product_count OR
category_metrics.direct_sales_30d IS DISTINCT FROM EXCLUDED.direct_sales_30d;
-- Update calculate_status
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)

View File

@@ -124,7 +124,15 @@ BEGIN
profit_30d = EXCLUDED.profit_30d, cogs_30d = EXCLUDED.cogs_30d,
sales_365d = EXCLUDED.sales_365d, revenue_365d = EXCLUDED.revenue_365d,
lifetime_sales = EXCLUDED.lifetime_sales, lifetime_revenue = EXCLUDED.lifetime_revenue,
avg_margin_30d = EXCLUDED.avg_margin_30d;
avg_margin_30d = EXCLUDED.avg_margin_30d
WHERE -- Only update if at least one value has changed
vendor_metrics.product_count IS DISTINCT FROM EXCLUDED.product_count OR
vendor_metrics.active_product_count IS DISTINCT FROM EXCLUDED.active_product_count OR
vendor_metrics.current_stock_units IS DISTINCT FROM EXCLUDED.current_stock_units OR
vendor_metrics.on_order_units IS DISTINCT FROM EXCLUDED.on_order_units OR
vendor_metrics.sales_30d IS DISTINCT FROM EXCLUDED.sales_30d OR
vendor_metrics.revenue_30d IS DISTINCT FROM EXCLUDED.revenue_30d OR
vendor_metrics.lifetime_sales IS DISTINCT FROM EXCLUDED.lifetime_sales;
-- Update calculate_status
INSERT INTO public.calculate_status (module_name, last_calculation_timestamp)

View File

@@ -735,6 +735,22 @@ BEGIN
overstocked_units = EXCLUDED.overstocked_units, overstocked_cost = EXCLUDED.overstocked_cost, overstocked_retail = EXCLUDED.overstocked_retail, is_old_stock = EXCLUDED.is_old_stock,
yesterday_sales = EXCLUDED.yesterday_sales,
status = EXCLUDED.status
WHERE -- Only update if at least one key metric has changed
product_metrics.current_stock IS DISTINCT FROM EXCLUDED.current_stock OR
product_metrics.current_price IS DISTINCT FROM EXCLUDED.current_price OR
product_metrics.current_cost_price IS DISTINCT FROM EXCLUDED.current_cost_price OR
product_metrics.on_order_qty IS DISTINCT FROM EXCLUDED.on_order_qty OR
product_metrics.sales_7d IS DISTINCT FROM EXCLUDED.sales_7d OR
product_metrics.sales_30d IS DISTINCT FROM EXCLUDED.sales_30d OR
product_metrics.revenue_30d IS DISTINCT FROM EXCLUDED.revenue_30d OR
product_metrics.status IS DISTINCT FROM EXCLUDED.status OR
product_metrics.replenishment_units IS DISTINCT FROM EXCLUDED.replenishment_units OR
product_metrics.stock_cover_in_days IS DISTINCT FROM EXCLUDED.stock_cover_in_days OR
product_metrics.yesterday_sales IS DISTINCT FROM EXCLUDED.yesterday_sales OR
-- Check a few other important fields that might change
product_metrics.date_last_sold IS DISTINCT FROM EXCLUDED.date_last_sold OR
product_metrics.earliest_expected_date IS DISTINCT FROM EXCLUDED.earliest_expected_date OR
product_metrics.lifetime_sales IS DISTINCT FROM EXCLUDED.lifetime_sales
;
-- Update the status table with the timestamp from the START of this run

View File

@@ -1,428 +0,0 @@
#!/bin/bash
# Simple script to import CSV to PostgreSQL using psql
# Usage: ./psql-csv-import.sh <csv-file> <table-name> [start-batch]
# Exit on error
set -e
# Get arguments
CSV_FILE=$1
TABLE_NAME=$2
BATCH_SIZE=500000 # Process 500,000 rows at a time
START_BATCH=${3:-1} # Optional third parameter to start from a specific batch
if [ -z "$CSV_FILE" ] || [ -z "$TABLE_NAME" ]; then
echo "Usage: ./psql-csv-import.sh <csv-file> <table-name> [start-batch]"
exit 1
fi
# Check if file exists (only needed for batch 1)
if [ "$START_BATCH" -eq 1 ] && [ ! -f "$CSV_FILE" ]; then
echo "Error: CSV file '$CSV_FILE' not found"
exit 1
fi
# Load environment variables
if [ -f "../.env" ]; then
source "../.env"
else
echo "Warning: .env file not found, using default connection parameters"
fi
# Set default connection parameters if not from .env
DB_HOST=${DB_HOST:-localhost}
DB_PORT=${DB_PORT:-5432}
DB_NAME=${DB_NAME:-inventory_db}
DB_USER=${DB_USER:-postgres}
export PGPASSWORD=${DB_PASSWORD:-} # Export password for psql
# Common psql parameters
PSQL_OPTS="-h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME"
# Function to clean up database state
cleanup_and_optimize() {
echo "Cleaning up and optimizing database state..."
# Analyze the target table to update statistics
psql $PSQL_OPTS -c "ANALYZE $TABLE_NAME;"
# Perform vacuum to reclaim space and update stats
psql $PSQL_OPTS -c "VACUUM $TABLE_NAME;"
# Reset connection pool
psql $PSQL_OPTS -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();"
# Clean up shared memory
psql $PSQL_OPTS -c "DISCARD ALL;"
echo "Optimization complete."
}
# Show connection info
echo "Importing $CSV_FILE into $TABLE_NAME"
echo "Database: $DB_NAME on $DB_HOST:$DB_PORT with batch size: $BATCH_SIZE starting at batch $START_BATCH"
# Start timer
START_TIME=$(date +%s)
# Create progress tracking file
PROGRESS_FILE="/tmp/import_progress_${TABLE_NAME}.txt"
touch "$PROGRESS_FILE"
echo "Starting import at $(date), batch $START_BATCH" >> "$PROGRESS_FILE"
# If we're resuming, run cleanup first
if [ "$START_BATCH" -gt 1 ]; then
cleanup_and_optimize
fi
# For imported_product_stat_history, use optimized approach with hardcoded column names
if [ "$TABLE_NAME" = "imported_product_stat_history" ]; then
echo "Using optimized import for $TABLE_NAME"
# Only drop constraints/indexes and create staging table for batch 1
if [ "$START_BATCH" -eq 1 ]; then
# Extract CSV header
CSV_HEADER=$(head -n 1 "$CSV_FILE")
echo "CSV header: $CSV_HEADER"
# Step 1: Drop constraints and indexes
echo "Dropping constraints and indexes..."
psql $PSQL_OPTS -c "
DO \$\$
DECLARE
constraint_name TEXT;
BEGIN
-- Drop primary key constraint if exists
SELECT conname INTO constraint_name
FROM pg_constraint
WHERE conrelid = '$TABLE_NAME'::regclass AND contype = 'p';
IF FOUND THEN
EXECUTE 'ALTER TABLE $TABLE_NAME DROP CONSTRAINT IF EXISTS ' || constraint_name;
RAISE NOTICE 'Dropped primary key constraint: %', constraint_name;
END IF;
END \$\$;
"
# Drop all indexes on the table
psql $PSQL_OPTS -c "
DO \$\$
DECLARE
index_name TEXT;
index_record RECORD;
BEGIN
FOR index_record IN
SELECT indexname
FROM pg_indexes
WHERE tablename = '$TABLE_NAME'
LOOP
EXECUTE 'DROP INDEX IF EXISTS ' || index_record.indexname;
RAISE NOTICE 'Dropped index: %', index_record.indexname;
END LOOP;
END \$\$;
"
# Step 2: Set maintenance_work_mem and disable triggers
echo "Setting maintenance_work_mem and disabling triggers..."
psql $PSQL_OPTS -c "
SET maintenance_work_mem = '1GB';
ALTER TABLE $TABLE_NAME DISABLE TRIGGER ALL;
"
# Step 3: Create staging table
echo "Creating staging table..."
psql $PSQL_OPTS -c "
DROP TABLE IF EXISTS staging_import;
CREATE UNLOGGED TABLE staging_import (
pid TEXT,
date TEXT,
score TEXT,
score2 TEXT,
qty_in_baskets TEXT,
qty_sold TEXT,
notifies_set TEXT,
visibility_score TEXT,
health_score TEXT,
sold_view_score TEXT
);
-- Create an index on staging_import to improve OFFSET performance
CREATE INDEX ON staging_import (pid);
"
# Step 4: Import CSV into staging table
echo "Importing CSV into staging table..."
psql $PSQL_OPTS -c "\copy staging_import FROM '$CSV_FILE' WITH CSV HEADER DELIMITER ','"
else
echo "Resuming import from batch $START_BATCH - skipping table creation and CSV import"
# Check if staging table exists
STAGING_EXISTS=$(psql $PSQL_OPTS -t -c "SELECT EXISTS(SELECT 1 FROM pg_tables WHERE tablename='staging_import');" | tr -d '[:space:]')
if [ "$STAGING_EXISTS" != "t" ]; then
echo "Error: Staging table 'staging_import' does not exist. Run without batch parameter first."
exit 1
fi
# Ensure triggers are disabled
psql $PSQL_OPTS -c "ALTER TABLE $TABLE_NAME DISABLE TRIGGER ALL;"
# Optimize PostgreSQL for better performance
psql $PSQL_OPTS -c "
-- Increase work mem for this session
SET work_mem = '256MB';
SET maintenance_work_mem = '1GB';
"
fi
# Step 5: Get total row count
TOTAL_ROWS=$(psql $PSQL_OPTS -t -c "SELECT COUNT(*) FROM staging_import;" | tr -d '[:space:]')
echo "Total rows to import: $TOTAL_ROWS"
# Calculate starting point
PROCESSED=$(( ($START_BATCH - 1) * $BATCH_SIZE ))
if [ $PROCESSED -ge $TOTAL_ROWS ]; then
echo "Error: Start batch $START_BATCH is beyond the available rows ($TOTAL_ROWS)"
exit 1
fi
# Step 6: Process in batches with shell loop
BATCH_NUM=$(( $START_BATCH - 1 ))
# We'll process batches in chunks of 10 before cleaning up
CHUNKS_SINCE_CLEANUP=0
while [ $PROCESSED -lt $TOTAL_ROWS ]; do
BATCH_NUM=$(( $BATCH_NUM + 1 ))
BATCH_START=$(date +%s)
MAX_ROWS=$(( $PROCESSED + $BATCH_SIZE ))
if [ $MAX_ROWS -gt $TOTAL_ROWS ]; then
MAX_ROWS=$TOTAL_ROWS
fi
echo "Processing batch $BATCH_NUM (rows $PROCESSED to $MAX_ROWS)..."
# Optimize query buffer for this batch
psql $PSQL_OPTS -c "SET work_mem = '256MB';"
# Insert batch with type casts
psql $PSQL_OPTS -c "
INSERT INTO $TABLE_NAME (
pid, date, score, score2, qty_in_baskets, qty_sold,
notifies_set, visibility_score, health_score, sold_view_score
)
SELECT
pid::bigint,
date::date,
score::numeric,
score2::numeric,
qty_in_baskets::smallint,
qty_sold::smallint,
notifies_set::smallint,
visibility_score::numeric,
health_score::varchar,
sold_view_score::numeric
FROM staging_import
LIMIT $BATCH_SIZE
OFFSET $PROCESSED;
"
# Update progress
BATCH_END=$(date +%s)
BATCH_ELAPSED=$(( $BATCH_END - $BATCH_START ))
PROGRESS_PCT=$(echo "scale=2; $MAX_ROWS * 100 / $TOTAL_ROWS" | bc)
echo "Batch $BATCH_NUM committed in ${BATCH_ELAPSED}s, $MAX_ROWS of $TOTAL_ROWS rows processed ($PROGRESS_PCT%)" | tee -a "$PROGRESS_FILE"
# Increment counter
PROCESSED=$(( $PROCESSED + $BATCH_SIZE ))
CHUNKS_SINCE_CLEANUP=$(( $CHUNKS_SINCE_CLEANUP + 1 ))
# Check current row count every 10 batches
if [ $(( $BATCH_NUM % 10 )) -eq 0 ]; then
CURRENT_COUNT=$(psql $PSQL_OPTS -t -c "SELECT COUNT(*) FROM $TABLE_NAME;" | tr -d '[:space:]')
echo "Current row count in $TABLE_NAME: $CURRENT_COUNT" | tee -a "$PROGRESS_FILE"
# Every 10 batches, run an intermediate cleanup
if [ $CHUNKS_SINCE_CLEANUP -ge 10 ]; then
echo "Running intermediate cleanup and optimization..."
psql $PSQL_OPTS -c "VACUUM $TABLE_NAME;"
CHUNKS_SINCE_CLEANUP=0
fi
fi
# Optional - write a checkpoint file to know where to restart
echo "$BATCH_NUM" > "/tmp/import_last_batch_${TABLE_NAME}.txt"
done
# Only recreate indexes if we've completed the import
if [ $PROCESSED -ge $TOTAL_ROWS ]; then
# Step 7: Re-enable triggers and recreate primary key
echo "Re-enabling triggers and recreating primary key..."
psql $PSQL_OPTS -c "
ALTER TABLE $TABLE_NAME ENABLE TRIGGER ALL;
ALTER TABLE $TABLE_NAME ADD PRIMARY KEY (pid, date);
"
# Step 8: Clean up and get final count
echo "Cleaning up and getting final count..."
psql $PSQL_OPTS -c "
DROP TABLE staging_import;
VACUUM ANALYZE $TABLE_NAME;
SELECT COUNT(*) AS \"Total rows in $TABLE_NAME\" FROM $TABLE_NAME;
"
else
echo "Import interrupted at batch $BATCH_NUM. To resume, run:"
echo "./psql-csv-import.sh $CSV_FILE $TABLE_NAME $BATCH_NUM"
fi
else
# Generic approach for other tables
if [ "$START_BATCH" -eq 1 ]; then
# Extract CSV header
CSV_HEADER=$(head -n 1 "$CSV_FILE")
echo "CSV header: $CSV_HEADER"
# Extract CSV header and format it for SQL
CSV_COLUMNS=$(echo "$CSV_HEADER" | tr ',' '\n' | sed 's/^/"/;s/$/"/' | tr '\n' ',' | sed 's/,$//')
TEMP_COLUMNS=$(echo "$CSV_HEADER" | tr ',' '\n' | sed 's/$/ TEXT/' | tr '\n' ',' | sed 's/,$//')
echo "Importing columns: $CSV_COLUMNS"
# Step 1: Set maintenance_work_mem and disable triggers
echo "Setting maintenance_work_mem and disabling triggers..."
psql $PSQL_OPTS -c "
SET maintenance_work_mem = '1GB';
ALTER TABLE $TABLE_NAME DISABLE TRIGGER ALL;
"
# Step 2: Create temp table
echo "Creating temporary table..."
psql $PSQL_OPTS -c "
DROP TABLE IF EXISTS temp_import;
CREATE UNLOGGED TABLE temp_import ($TEMP_COLUMNS);
-- Create an index on temp_import to improve OFFSET performance
CREATE INDEX ON temp_import ((1)); -- Index on first column
"
# Step 3: Import CSV into temp table
echo "Importing CSV into temporary table..."
psql $PSQL_OPTS -c "\copy temp_import FROM '$CSV_FILE' WITH CSV HEADER DELIMITER ','"
else
echo "Resuming import from batch $START_BATCH - skipping table creation and CSV import"
# Check if temp table exists
TEMP_EXISTS=$(psql $PSQL_OPTS -t -c "SELECT EXISTS(SELECT 1 FROM pg_tables WHERE tablename='temp_import');" | tr -d '[:space:]')
if [ "$TEMP_EXISTS" != "t" ]; then
echo "Error: Temporary table 'temp_import' does not exist. Run without batch parameter first."
exit 1
fi
# Ensure triggers are disabled
psql $PSQL_OPTS -c "ALTER TABLE $TABLE_NAME DISABLE TRIGGER ALL;"
# Optimize PostgreSQL for better performance
psql $PSQL_OPTS -c "
-- Increase work mem for this session
SET work_mem = '256MB';
SET maintenance_work_mem = '1GB';
"
# Hard-code columns since we know them
CSV_COLUMNS='"pid","date","score","score2","qty_in_baskets","qty_sold","notifies_set","visibility_score","health_score","sold_view_score"'
echo "Using standard columns: $CSV_COLUMNS"
fi
# Step 4: Get total row count
TOTAL_ROWS=$(psql $PSQL_OPTS -t -c "SELECT COUNT(*) FROM temp_import;" | tr -d '[:space:]')
echo "Total rows to import: $TOTAL_ROWS"
# Calculate starting point
PROCESSED=$(( ($START_BATCH - 1) * $BATCH_SIZE ))
if [ $PROCESSED -ge $TOTAL_ROWS ]; then
echo "Error: Start batch $START_BATCH is beyond the available rows ($TOTAL_ROWS)"
exit 1
fi
# Step 5: Process in batches with shell loop
BATCH_NUM=$(( $START_BATCH - 1 ))
# We'll process batches in chunks of 10 before cleaning up
CHUNKS_SINCE_CLEANUP=0
while [ $PROCESSED -lt $TOTAL_ROWS ]; do
BATCH_NUM=$(( $BATCH_NUM + 1 ))
BATCH_START=$(date +%s)
MAX_ROWS=$(( $PROCESSED + $BATCH_SIZE ))
if [ $MAX_ROWS -gt $TOTAL_ROWS ]; then
MAX_ROWS=$TOTAL_ROWS
fi
echo "Processing batch $BATCH_NUM (rows $PROCESSED to $MAX_ROWS)..."
# Optimize query buffer for this batch
psql $PSQL_OPTS -c "SET work_mem = '256MB';"
# Insert batch
psql $PSQL_OPTS -c "
INSERT INTO $TABLE_NAME ($CSV_COLUMNS)
SELECT $CSV_COLUMNS
FROM temp_import
LIMIT $BATCH_SIZE
OFFSET $PROCESSED;
"
# Update progress
BATCH_END=$(date +%s)
BATCH_ELAPSED=$(( $BATCH_END - $BATCH_START ))
PROGRESS_PCT=$(echo "scale=2; $MAX_ROWS * 100 / $TOTAL_ROWS" | bc)
echo "Batch $BATCH_NUM committed in ${BATCH_ELAPSED}s, $MAX_ROWS of $TOTAL_ROWS rows processed ($PROGRESS_PCT%)" | tee -a "$PROGRESS_FILE"
# Increment counter
PROCESSED=$(( $PROCESSED + $BATCH_SIZE ))
CHUNKS_SINCE_CLEANUP=$(( $CHUNKS_SINCE_CLEANUP + 1 ))
# Check current row count every 10 batches
if [ $(( $BATCH_NUM % 10 )) -eq 0 ]; then
CURRENT_COUNT=$(psql $PSQL_OPTS -t -c "SELECT COUNT(*) FROM $TABLE_NAME;" | tr -d '[:space:]')
echo "Current row count in $TABLE_NAME: $CURRENT_COUNT" | tee -a "$PROGRESS_FILE"
# Every 10 batches, run an intermediate cleanup
if [ $CHUNKS_SINCE_CLEANUP -ge 10 ]; then
echo "Running intermediate cleanup and optimization..."
psql $PSQL_OPTS -c "VACUUM $TABLE_NAME;"
CHUNKS_SINCE_CLEANUP=0
fi
fi
# Optional - write a checkpoint file to know where to restart
echo "$BATCH_NUM" > "/tmp/import_last_batch_${TABLE_NAME}.txt"
done
# Only clean up if we've completed the import
if [ $PROCESSED -ge $TOTAL_ROWS ]; then
# Step 6: Re-enable triggers and clean up
echo "Re-enabling triggers and cleaning up..."
psql $PSQL_OPTS -c "
ALTER TABLE $TABLE_NAME ENABLE TRIGGER ALL;
DROP TABLE temp_import;
VACUUM ANALYZE $TABLE_NAME;
SELECT COUNT(*) AS \"Total rows in $TABLE_NAME\" FROM $TABLE_NAME;
"
else
echo "Import interrupted at batch $BATCH_NUM. To resume, run:"
echo "./psql-csv-import.sh $CSV_FILE $TABLE_NAME $BATCH_NUM"
fi
fi
# Calculate elapsed time
END_TIME=$(date +%s)
ELAPSED=$((END_TIME - START_TIME))
echo "Import completed successfully in ${ELAPSED}s ($(($ELAPSED / 60)) minutes)"
echo "Progress log saved to $PROGRESS_FILE"