Fix category import

This commit is contained in:
2025-01-25 14:11:38 -05:00
parent e1174b8e63
commit 1694562947
2 changed files with 176 additions and 241 deletions

View File

@@ -68,11 +68,12 @@ CREATE TABLE categories (
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
status VARCHAR(20) DEFAULT 'active', status VARCHAR(20) DEFAULT 'active',
UNIQUE KEY unique_name_type (name, type), UNIQUE KEY unique_category (id),
FOREIGN KEY (parent_id) REFERENCES categories(id), FOREIGN KEY (parent_id) REFERENCES categories(id),
INDEX idx_parent (parent_id), INDEX idx_parent (parent_id),
INDEX idx_type (type), INDEX idx_type (type),
INDEX idx_status (status) INDEX idx_status (status),
INDEX idx_name_type (name, type)
) ENGINE=InnoDB; ) ENGINE=InnoDB;
-- Create vendor_details table -- Create vendor_details table

View File

@@ -109,77 +109,87 @@ async function importCategories(prodConnection, localConnection) {
}); });
const startTime = Date.now(); const startTime = Date.now();
const typeOrder = [10, 20, 11, 21, 12, 13];
let totalInserted = 0;
let skippedCategories = [];
try { try {
// First get all categories that we need // Process each type in order with its own query
const [allRows] = await prodConnection.query(` for (const type of typeOrder) {
SELECT DISTINCT const [categories] = await prodConnection.query(`
pc.cat_id as id, SELECT
pc.name, pc.cat_id as id,
pc.type, pc.name,
pc.master_cat_id as parent_id, pc.type,
pc.combined_name as description, CASE
'active' as status WHEN pc.type IN (10, 20) THEN NULL -- Top level categories should have no parent
FROM product_categories pc WHEN pc.master_cat_id IS NULL THEN NULL
INNER JOIN product_category_index pci ON pc.cat_id = pci.cat_id ELSE pc.master_cat_id
INNER JOIN products p ON pci.pid = p.pid END as parent_id,
WHERE pc.hidden = 0 pc.combined_name as description
AND p.date_created >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR) FROM product_categories pc
`); WHERE pc.type = ?
ORDER BY pc.cat_id
`, [type]);
// Separate into root and child categories if (categories.length === 0) continue;
const rootCategories = allRows.filter(row => !row.parent_id || row.parent_id === 0);
const childCategories = allRows.filter(row => row.parent_id && row.parent_id > 0);
const total = allRows.length; console.log(`\nProcessing ${categories.length} type ${type} categories`);
let current = 0;
// First insert root categories // For types that can have parents (11, 21, 12, 13), verify parent existence
if (rootCategories.length > 0) { let categoriesToInsert = categories;
const placeholders = rootCategories.map(() => if (![10, 20].includes(type)) {
'(?, ?, ?, NULL, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)' // Get all parent IDs
).join(','); const parentIds = [...new Set(categories.map(c => c.parent_id).filter(id => id !== null))];
// Check which parents exist
const [existingParents] = await localConnection.query(
'SELECT id FROM categories WHERE id IN (?)',
[parentIds]
);
const existingParentIds = new Set(existingParents.map(p => p.id));
const values = rootCategories.flatMap(row => [ // Filter categories and track skipped ones
row.id, categoriesToInsert = categories.filter(cat =>
row.name, cat.parent_id === null || existingParentIds.has(cat.parent_id)
row.type, );
row.description, const invalidCategories = categories.filter(cat =>
row.status cat.parent_id !== null && !existingParentIds.has(cat.parent_id)
]); );
await localConnection.query(` if (invalidCategories.length > 0) {
INSERT INTO categories (id, name, type, parent_id, description, status, created_at, updated_at) const skippedInfo = invalidCategories.map(c => ({
VALUES ${placeholders} id: c.id,
ON DUPLICATE KEY UPDATE name: c.name,
name = VALUES(name), type: c.type,
type = VALUES(type), missing_parent: c.parent_id
parent_id = NULL, }));
description = VALUES(description), skippedCategories.push(...skippedInfo);
status = VALUES(status),
updated_at = CURRENT_TIMESTAMP console.log('\nSkipping categories with missing parents:',
`, values); invalidCategories.map(c => `${c.id} - ${c.name} (missing parent: ${c.parent_id})`).join('\n')
);
}
current += rootCategories.length; if (categoriesToInsert.length === 0) {
updateProgress(current, total, 'Categories import (root categories)', startTime); console.log(`No valid categories of type ${type} to insert - all had missing parents`);
} continue;
}
}
// Then insert child categories in batches console.log(`Inserting ${categoriesToInsert.length} type ${type} categories`);
const BATCH_SIZE = 100;
for (let i = 0; i < childCategories.length; i += BATCH_SIZE) { const placeholders = categoriesToInsert.map(() =>
const batch = childCategories.slice(i, i + BATCH_SIZE);
const placeholders = batch.map(() =>
'(?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)' '(?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
).join(','); ).join(',');
const values = batch.flatMap(row => [ const values = categoriesToInsert.flatMap(cat => [
row.id, cat.id,
row.name, cat.name,
row.type, cat.type,
row.parent_id, cat.parent_id,
row.description, cat.description,
row.status 'active'
]); ]);
await localConnection.query(` await localConnection.query(`
@@ -194,210 +204,147 @@ async function importCategories(prodConnection, localConnection) {
updated_at = CURRENT_TIMESTAMP updated_at = CURRENT_TIMESTAMP
`, values); `, values);
current += batch.length; totalInserted += categoriesToInsert.length;
updateProgress(current, total, 'Categories import (child categories)', startTime); updateProgress(totalInserted, totalInserted, 'Categories import', startTime);
}
// After all imports, if we skipped any categories, throw an error
if (skippedCategories.length > 0) {
const error = new Error('Categories import completed with errors - some categories were skipped due to missing parents');
error.skippedCategories = skippedCategories;
throw error;
} }
outputProgress({ outputProgress({
status: 'complete', status: 'complete',
operation: 'Categories import completed', operation: 'Categories import completed',
current: total, current: totalInserted,
total, total: totalInserted,
duration: formatDuration((Date.now() - startTime) / 1000) duration: formatDuration((Date.now() - startTime) / 1000)
}); });
} catch (error) { } catch (error) {
console.error('Error importing categories:', error); console.error('Error importing categories:', error);
if (error.skippedCategories) {
console.error('Skipped categories:', JSON.stringify(error.skippedCategories, null, 2));
}
throw error; throw error;
} }
} }
async function importProducts(prodConnection, localConnection) { async function importProducts(prodConnection, localConnection) {
outputProgress({ outputProgress({
operation: 'Starting products and categories import', operation: 'Starting products import',
status: 'running' status: 'running'
}); });
const startTime = Date.now(); const startTime = Date.now();
try { try {
// First get all products with their categories // First get all products
const [rows] = await prodConnection.query(` const [rows] = await prodConnection.query(`
WITH RECURSIVE category_hierarchy AS (
-- Get all categories and their full hierarchy
SELECT
c.cat_id,
c.name,
c.type,
c.master_cat_id,
c.combined_name,
1 as level
FROM product_categories c
WHERE c.master_cat_id = 0 OR c.master_cat_id IS NULL
UNION ALL
SELECT
c.cat_id,
c.name,
c.type,
c.master_cat_id,
c.combined_name,
h.level + 1
FROM product_categories c
INNER JOIN category_hierarchy h ON c.master_cat_id = h.cat_id
)
SELECT SELECT
p.*, p.pid as id,
GROUP_CONCAT(DISTINCT p.description as title,
CONCAT_WS(':', p.notes as description,
ch.cat_id, p.itemnumber as SKU,
ch.name, p.date_created as created_at,
ch.type, p.datein as first_received,
ch.master_cat_id, p.available_local as stock_quantity,
ch.combined_name, p.price_each as price,
ch.level p.sellingprice as regular_price,
) p.cost_each as cost_price,
ORDER BY ch.level p.cost_landed as landing_cost_price,
) as categories p.upc as barcode,
p.harmonized_tariff_code,
p.stamp as updated_at,
CASE WHEN p.show + p.buyable > 0 THEN 1 ELSE 0 END as visible,
1 as managing_stock,
CASE WHEN p.reorder IN (127, 0) THEN 1 ELSE 0 END as replenishable,
p.supplier_name as vendor,
p.supplier_itemnumber as vendor_reference,
p.notions_itemnumber as notions_reference,
p.permalink,
p.image,
p.image_175,
p.image_full,
p.brand,
p.line,
p.subline,
p.artist,
p.options,
p.tags,
GROUP_CONCAT(DISTINCT pc.cat_id) as categories
FROM products p FROM products p
LEFT JOIN product_category_index pci ON p.pid = pci.pid LEFT JOIN product_category_index pci ON p.pid = pci.pid
LEFT JOIN category_hierarchy ch ON pci.cat_id = ch.cat_id LEFT JOIN product_categories pc ON pci.cat_id = pc.cat_id
WHERE p.date_created >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR) WHERE p.date_created >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR)
AND pc.hidden = 0
GROUP BY p.pid GROUP BY p.pid
`); `);
let current = 0; let current = 0;
const total = rows.length; const total = rows.length;
// Track categories we need to insert // Process products in batches
const categories = new Map();
// First pass: collect all categories
rows.forEach(row => {
if (row.categories) {
row.categories.split(',').forEach(catStr => {
const [id, name, type, parentId, description, level] = catStr.split(':');
categories.set(id, {
id: parseInt(id),
name,
type,
parent_id: parentId === '0' ? null : parseInt(parentId),
description,
level: parseInt(level),
status: 'active'
});
});
}
});
// Sort categories by level to ensure parents are inserted first
const sortedCategories = Array.from(categories.values())
.sort((a, b) => a.level - b.level);
// Insert categories level by level
const levels = [...new Set(sortedCategories.map(c => c.level))];
outputProgress({
status: 'running',
operation: 'Importing categories by level',
current: 0,
total: sortedCategories.length
});
let insertedCategories = 0;
for (const level of levels) {
const levelCategories = sortedCategories.filter(c => c.level === level);
if (levelCategories.length > 0) {
const placeholders = levelCategories.map(() =>
'(?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
).join(',');
const values = levelCategories.flatMap(cat => [
cat.id,
cat.name,
cat.type,
cat.parent_id,
cat.description,
cat.status
]);
await localConnection.query(`
INSERT INTO categories (id, name, type, parent_id, description, status, created_at, updated_at)
VALUES ${placeholders}
ON DUPLICATE KEY UPDATE
name = VALUES(name),
type = VALUES(type),
parent_id = VALUES(parent_id),
description = VALUES(description),
status = VALUES(status),
updated_at = CURRENT_TIMESTAMP
`, values);
insertedCategories += levelCategories.length;
updateProgress(insertedCategories, sortedCategories.length, 'Categories import', startTime);
}
}
// Now import products in batches
const BATCH_SIZE = 100; const BATCH_SIZE = 100;
for (let i = 0; i < rows.length; i += BATCH_SIZE) { for (let i = 0; i < rows.length; i += BATCH_SIZE) {
const batch = rows.slice(i, i + BATCH_SIZE); const batch = rows.slice(i, i + BATCH_SIZE);
// Create placeholders for batch insert
const placeholders = batch.map(() => const placeholders = batch.map(() =>
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)' '(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'
).join(','); ).join(',');
// Flatten values for batch insert
const values = batch.flatMap(row => [ const values = batch.flatMap(row => [
row.pid, row.id,
row.title, row.title,
row.description || null, row.description,
row.itemnumber, row.SKU,
row.date_created, row.created_at,
row.first_received,
row.stock_quantity || 0, row.stock_quantity || 0,
row.price || 0, row.price || 0,
row.price_reg || 0, row.regular_price || 0,
row.cost_each || null, row.cost_price,
row.cost_landed || null, row.landing_cost_price,
row.barcode || null, row.barcode,
row.harmonized_tariff_code || null, row.harmonized_tariff_code,
row.visible === 1, row.updated_at,
row.managing_stock === 1, row.visible,
row.replenishable === 1, row.managing_stock,
row.supplier_name || null, row.replenishable,
row.supplier_reference || null, row.vendor,
row.notions_reference || null, row.vendor_reference,
row.permalink || null, row.notions_reference,
row.image || null, row.permalink,
row.image_175 || null, row.image,
row.image_full || null, row.image_175,
row.brand || null, row.image_full,
row.line || null, row.brand,
row.subline || null, row.line,
row.artist || null, row.subline,
row.options || null, row.artist,
row.tags || null, row.options,
row.moq || 1, row.tags
row.uom || 1,
row.rating || null,
row.reviews || null,
row.weight || null,
row.length || null,
row.width || null,
row.height || null,
row.country_of_origin || null,
row.location || null,
row.total_sold || 0,
row.baskets || 0,
row.notifies || 0,
row.date_last_sold || null
]); ]);
await localConnection.query(` await localConnection.query(`
INSERT INTO products VALUES ${placeholders} INSERT INTO products (
id, title, description, SKU, created_at, first_received,
stock_quantity, price, regular_price, cost_price, landing_cost_price,
barcode, harmonized_tariff_code, updated_at, visible, managing_stock,
replenishable, vendor, vendor_reference, notions_reference, permalink,
image, image_175, image_full, brand, line, subline, artist, options, tags
)
VALUES ${placeholders}
ON DUPLICATE KEY UPDATE ON DUPLICATE KEY UPDATE
title = VALUES(title), title = VALUES(title),
description = VALUES(description), description = VALUES(description),
SKU = VALUES(SKU),
created_at = VALUES(created_at),
first_received = VALUES(first_received),
stock_quantity = VALUES(stock_quantity), stock_quantity = VALUES(stock_quantity),
price = VALUES(price), price = VALUES(price),
regular_price = VALUES(regular_price), regular_price = VALUES(regular_price),
@@ -421,21 +368,7 @@ async function importProducts(prodConnection, localConnection) {
subline = VALUES(subline), subline = VALUES(subline),
artist = VALUES(artist), artist = VALUES(artist),
options = VALUES(options), options = VALUES(options),
tags = VALUES(tags), tags = VALUES(tags)
moq = VALUES(moq),
uom = VALUES(uom),
rating = VALUES(rating),
reviews = VALUES(reviews),
weight = VALUES(weight),
length = VALUES(length),
width = VALUES(width),
height = VALUES(height),
country_of_origin = VALUES(country_of_origin),
location = VALUES(location),
total_sold = VALUES(total_sold),
baskets = VALUES(baskets),
notifies = VALUES(notifies),
date_last_sold = VALUES(date_last_sold)
`, values); `, values);
current += batch.length; current += batch.length;
@@ -444,13 +377,13 @@ async function importProducts(prodConnection, localConnection) {
outputProgress({ outputProgress({
status: 'complete', status: 'complete',
operation: 'Products and categories import completed', operation: 'Products import completed',
current: total, current: total,
total, total,
duration: formatDuration((Date.now() - startTime) / 1000) duration: formatDuration((Date.now() - startTime) / 1000)
}); });
} catch (error) { } catch (error) {
console.error('Error importing products and categories:', error); console.error('Error importing products:', error);
throw error; throw error;
} }
} }
@@ -758,7 +691,7 @@ async function main() {
message: 'Setting up connections...' message: 'Setting up connections...'
}); });
// Set up SSH tunnel and production database connection // Set up connections
const tunnel = await setupSshTunnel(); const tunnel = await setupSshTunnel();
ssh = tunnel.ssh; ssh = tunnel.ssh;
@@ -767,18 +700,19 @@ async function main() {
stream: tunnel.stream stream: tunnel.stream
}); });
// Set up local database connection
localConnection = await mysql.createPool(localDbConfig); localConnection = await mysql.createPool(localDbConfig);
// Check for cancellation after connections if (isImportCancelled) throw new Error('Import cancelled');
if (isImportCancelled) {
throw new Error('Import cancelled');
}
// Import products (and categories) // First import all categories
await importCategories(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled');
// Then import products
await importProducts(prodConnection, localConnection); await importProducts(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled'); if (isImportCancelled) throw new Error('Import cancelled');
// Then import product-category relationships
await importProductCategories(prodConnection, localConnection); await importProductCategories(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled'); if (isImportCancelled) throw new Error('Import cancelled');
@@ -800,7 +734,7 @@ async function main() {
operation: 'Import process', operation: 'Import process',
error: error.message error: error.message
}); });
throw error; // Re-throw to be handled by caller throw error;
} finally { } finally {
if (prodConnection) await prodConnection.end(); if (prodConnection) await prodConnection.end();
if (localConnection) await localConnection.end(); if (localConnection) await localConnection.end();