Fix category import

This commit is contained in:
2025-01-25 14:11:38 -05:00
parent e1174b8e63
commit 1694562947
2 changed files with 176 additions and 241 deletions

View File

@@ -68,11 +68,12 @@ CREATE TABLE categories (
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
status VARCHAR(20) DEFAULT 'active',
UNIQUE KEY unique_name_type (name, type),
UNIQUE KEY unique_category (id),
FOREIGN KEY (parent_id) REFERENCES categories(id),
INDEX idx_parent (parent_id),
INDEX idx_type (type),
INDEX idx_status (status)
INDEX idx_status (status),
INDEX idx_name_type (name, type)
) ENGINE=InnoDB;
-- Create vendor_details table

View File

@@ -109,77 +109,87 @@ async function importCategories(prodConnection, localConnection) {
});
const startTime = Date.now();
const typeOrder = [10, 20, 11, 21, 12, 13];
let totalInserted = 0;
let skippedCategories = [];
try {
// First get all categories that we need
const [allRows] = await prodConnection.query(`
SELECT DISTINCT
// Process each type in order with its own query
for (const type of typeOrder) {
const [categories] = await prodConnection.query(`
SELECT
pc.cat_id as id,
pc.name,
pc.type,
pc.master_cat_id as parent_id,
pc.combined_name as description,
'active' as status
CASE
WHEN pc.type IN (10, 20) THEN NULL -- Top level categories should have no parent
WHEN pc.master_cat_id IS NULL THEN NULL
ELSE pc.master_cat_id
END as parent_id,
pc.combined_name as description
FROM product_categories pc
INNER JOIN product_category_index pci ON pc.cat_id = pci.cat_id
INNER JOIN products p ON pci.pid = p.pid
WHERE pc.hidden = 0
AND p.date_created >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR)
`);
WHERE pc.type = ?
ORDER BY pc.cat_id
`, [type]);
// Separate into root and child categories
const rootCategories = allRows.filter(row => !row.parent_id || row.parent_id === 0);
const childCategories = allRows.filter(row => row.parent_id && row.parent_id > 0);
if (categories.length === 0) continue;
const total = allRows.length;
let current = 0;
console.log(`\nProcessing ${categories.length} type ${type} categories`);
// First insert root categories
if (rootCategories.length > 0) {
const placeholders = rootCategories.map(() =>
'(?, ?, ?, NULL, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
).join(',');
// For types that can have parents (11, 21, 12, 13), verify parent existence
let categoriesToInsert = categories;
if (![10, 20].includes(type)) {
// Get all parent IDs
const parentIds = [...new Set(categories.map(c => c.parent_id).filter(id => id !== null))];
const values = rootCategories.flatMap(row => [
row.id,
row.name,
row.type,
row.description,
row.status
]);
// Check which parents exist
const [existingParents] = await localConnection.query(
'SELECT id FROM categories WHERE id IN (?)',
[parentIds]
);
const existingParentIds = new Set(existingParents.map(p => p.id));
await localConnection.query(`
INSERT INTO categories (id, name, type, parent_id, description, status, created_at, updated_at)
VALUES ${placeholders}
ON DUPLICATE KEY UPDATE
name = VALUES(name),
type = VALUES(type),
parent_id = NULL,
description = VALUES(description),
status = VALUES(status),
updated_at = CURRENT_TIMESTAMP
`, values);
// Filter categories and track skipped ones
categoriesToInsert = categories.filter(cat =>
cat.parent_id === null || existingParentIds.has(cat.parent_id)
);
const invalidCategories = categories.filter(cat =>
cat.parent_id !== null && !existingParentIds.has(cat.parent_id)
);
current += rootCategories.length;
updateProgress(current, total, 'Categories import (root categories)', startTime);
if (invalidCategories.length > 0) {
const skippedInfo = invalidCategories.map(c => ({
id: c.id,
name: c.name,
type: c.type,
missing_parent: c.parent_id
}));
skippedCategories.push(...skippedInfo);
console.log('\nSkipping categories with missing parents:',
invalidCategories.map(c => `${c.id} - ${c.name} (missing parent: ${c.parent_id})`).join('\n')
);
}
// Then insert child categories in batches
const BATCH_SIZE = 100;
for (let i = 0; i < childCategories.length; i += BATCH_SIZE) {
const batch = childCategories.slice(i, i + BATCH_SIZE);
if (categoriesToInsert.length === 0) {
console.log(`No valid categories of type ${type} to insert - all had missing parents`);
continue;
}
}
const placeholders = batch.map(() =>
console.log(`Inserting ${categoriesToInsert.length} type ${type} categories`);
const placeholders = categoriesToInsert.map(() =>
'(?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
).join(',');
const values = batch.flatMap(row => [
row.id,
row.name,
row.type,
row.parent_id,
row.description,
row.status
const values = categoriesToInsert.flatMap(cat => [
cat.id,
cat.name,
cat.type,
cat.parent_id,
cat.description,
'active'
]);
await localConnection.query(`
@@ -194,210 +204,147 @@ async function importCategories(prodConnection, localConnection) {
updated_at = CURRENT_TIMESTAMP
`, values);
current += batch.length;
updateProgress(current, total, 'Categories import (child categories)', startTime);
totalInserted += categoriesToInsert.length;
updateProgress(totalInserted, totalInserted, 'Categories import', startTime);
}
// After all imports, if we skipped any categories, throw an error
if (skippedCategories.length > 0) {
const error = new Error('Categories import completed with errors - some categories were skipped due to missing parents');
error.skippedCategories = skippedCategories;
throw error;
}
outputProgress({
status: 'complete',
operation: 'Categories import completed',
current: total,
total,
current: totalInserted,
total: totalInserted,
duration: formatDuration((Date.now() - startTime) / 1000)
});
} catch (error) {
console.error('Error importing categories:', error);
if (error.skippedCategories) {
console.error('Skipped categories:', JSON.stringify(error.skippedCategories, null, 2));
}
throw error;
}
}
async function importProducts(prodConnection, localConnection) {
outputProgress({
operation: 'Starting products and categories import',
operation: 'Starting products import',
status: 'running'
});
const startTime = Date.now();
try {
// First get all products with their categories
// First get all products
const [rows] = await prodConnection.query(`
WITH RECURSIVE category_hierarchy AS (
-- Get all categories and their full hierarchy
SELECT
c.cat_id,
c.name,
c.type,
c.master_cat_id,
c.combined_name,
1 as level
FROM product_categories c
WHERE c.master_cat_id = 0 OR c.master_cat_id IS NULL
UNION ALL
SELECT
c.cat_id,
c.name,
c.type,
c.master_cat_id,
c.combined_name,
h.level + 1
FROM product_categories c
INNER JOIN category_hierarchy h ON c.master_cat_id = h.cat_id
)
SELECT
p.*,
GROUP_CONCAT(DISTINCT
CONCAT_WS(':',
ch.cat_id,
ch.name,
ch.type,
ch.master_cat_id,
ch.combined_name,
ch.level
)
ORDER BY ch.level
) as categories
p.pid as id,
p.description as title,
p.notes as description,
p.itemnumber as SKU,
p.date_created as created_at,
p.datein as first_received,
p.available_local as stock_quantity,
p.price_each as price,
p.sellingprice as regular_price,
p.cost_each as cost_price,
p.cost_landed as landing_cost_price,
p.upc as barcode,
p.harmonized_tariff_code,
p.stamp as updated_at,
CASE WHEN p.show + p.buyable > 0 THEN 1 ELSE 0 END as visible,
1 as managing_stock,
CASE WHEN p.reorder IN (127, 0) THEN 1 ELSE 0 END as replenishable,
p.supplier_name as vendor,
p.supplier_itemnumber as vendor_reference,
p.notions_itemnumber as notions_reference,
p.permalink,
p.image,
p.image_175,
p.image_full,
p.brand,
p.line,
p.subline,
p.artist,
p.options,
p.tags,
GROUP_CONCAT(DISTINCT pc.cat_id) as categories
FROM products p
LEFT JOIN product_category_index pci ON p.pid = pci.pid
LEFT JOIN category_hierarchy ch ON pci.cat_id = ch.cat_id
LEFT JOIN product_categories pc ON pci.cat_id = pc.cat_id
WHERE p.date_created >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR)
AND pc.hidden = 0
GROUP BY p.pid
`);
let current = 0;
const total = rows.length;
// Track categories we need to insert
const categories = new Map();
// First pass: collect all categories
rows.forEach(row => {
if (row.categories) {
row.categories.split(',').forEach(catStr => {
const [id, name, type, parentId, description, level] = catStr.split(':');
categories.set(id, {
id: parseInt(id),
name,
type,
parent_id: parentId === '0' ? null : parseInt(parentId),
description,
level: parseInt(level),
status: 'active'
});
});
}
});
// Sort categories by level to ensure parents are inserted first
const sortedCategories = Array.from(categories.values())
.sort((a, b) => a.level - b.level);
// Insert categories level by level
const levels = [...new Set(sortedCategories.map(c => c.level))];
outputProgress({
status: 'running',
operation: 'Importing categories by level',
current: 0,
total: sortedCategories.length
});
let insertedCategories = 0;
for (const level of levels) {
const levelCategories = sortedCategories.filter(c => c.level === level);
if (levelCategories.length > 0) {
const placeholders = levelCategories.map(() =>
'(?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
).join(',');
const values = levelCategories.flatMap(cat => [
cat.id,
cat.name,
cat.type,
cat.parent_id,
cat.description,
cat.status
]);
await localConnection.query(`
INSERT INTO categories (id, name, type, parent_id, description, status, created_at, updated_at)
VALUES ${placeholders}
ON DUPLICATE KEY UPDATE
name = VALUES(name),
type = VALUES(type),
parent_id = VALUES(parent_id),
description = VALUES(description),
status = VALUES(status),
updated_at = CURRENT_TIMESTAMP
`, values);
insertedCategories += levelCategories.length;
updateProgress(insertedCategories, sortedCategories.length, 'Categories import', startTime);
}
}
// Now import products in batches
// Process products in batches
const BATCH_SIZE = 100;
for (let i = 0; i < rows.length; i += BATCH_SIZE) {
const batch = rows.slice(i, i + BATCH_SIZE);
// Create placeholders for batch insert
const placeholders = batch.map(() =>
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)'
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'
).join(',');
// Flatten values for batch insert
const values = batch.flatMap(row => [
row.pid,
row.id,
row.title,
row.description || null,
row.itemnumber,
row.date_created,
row.description,
row.SKU,
row.created_at,
row.first_received,
row.stock_quantity || 0,
row.price || 0,
row.price_reg || 0,
row.cost_each || null,
row.cost_landed || null,
row.barcode || null,
row.harmonized_tariff_code || null,
row.visible === 1,
row.managing_stock === 1,
row.replenishable === 1,
row.supplier_name || null,
row.supplier_reference || null,
row.notions_reference || null,
row.permalink || null,
row.image || null,
row.image_175 || null,
row.image_full || null,
row.brand || null,
row.line || null,
row.subline || null,
row.artist || null,
row.options || null,
row.tags || null,
row.moq || 1,
row.uom || 1,
row.rating || null,
row.reviews || null,
row.weight || null,
row.length || null,
row.width || null,
row.height || null,
row.country_of_origin || null,
row.location || null,
row.total_sold || 0,
row.baskets || 0,
row.notifies || 0,
row.date_last_sold || null
row.regular_price || 0,
row.cost_price,
row.landing_cost_price,
row.barcode,
row.harmonized_tariff_code,
row.updated_at,
row.visible,
row.managing_stock,
row.replenishable,
row.vendor,
row.vendor_reference,
row.notions_reference,
row.permalink,
row.image,
row.image_175,
row.image_full,
row.brand,
row.line,
row.subline,
row.artist,
row.options,
row.tags
]);
await localConnection.query(`
INSERT INTO products VALUES ${placeholders}
INSERT INTO products (
id, title, description, SKU, created_at, first_received,
stock_quantity, price, regular_price, cost_price, landing_cost_price,
barcode, harmonized_tariff_code, updated_at, visible, managing_stock,
replenishable, vendor, vendor_reference, notions_reference, permalink,
image, image_175, image_full, brand, line, subline, artist, options, tags
)
VALUES ${placeholders}
ON DUPLICATE KEY UPDATE
title = VALUES(title),
description = VALUES(description),
SKU = VALUES(SKU),
created_at = VALUES(created_at),
first_received = VALUES(first_received),
stock_quantity = VALUES(stock_quantity),
price = VALUES(price),
regular_price = VALUES(regular_price),
@@ -421,21 +368,7 @@ async function importProducts(prodConnection, localConnection) {
subline = VALUES(subline),
artist = VALUES(artist),
options = VALUES(options),
tags = VALUES(tags),
moq = VALUES(moq),
uom = VALUES(uom),
rating = VALUES(rating),
reviews = VALUES(reviews),
weight = VALUES(weight),
length = VALUES(length),
width = VALUES(width),
height = VALUES(height),
country_of_origin = VALUES(country_of_origin),
location = VALUES(location),
total_sold = VALUES(total_sold),
baskets = VALUES(baskets),
notifies = VALUES(notifies),
date_last_sold = VALUES(date_last_sold)
tags = VALUES(tags)
`, values);
current += batch.length;
@@ -444,13 +377,13 @@ async function importProducts(prodConnection, localConnection) {
outputProgress({
status: 'complete',
operation: 'Products and categories import completed',
operation: 'Products import completed',
current: total,
total,
duration: formatDuration((Date.now() - startTime) / 1000)
});
} catch (error) {
console.error('Error importing products and categories:', error);
console.error('Error importing products:', error);
throw error;
}
}
@@ -758,7 +691,7 @@ async function main() {
message: 'Setting up connections...'
});
// Set up SSH tunnel and production database connection
// Set up connections
const tunnel = await setupSshTunnel();
ssh = tunnel.ssh;
@@ -767,18 +700,19 @@ async function main() {
stream: tunnel.stream
});
// Set up local database connection
localConnection = await mysql.createPool(localDbConfig);
// Check for cancellation after connections
if (isImportCancelled) {
throw new Error('Import cancelled');
}
if (isImportCancelled) throw new Error('Import cancelled');
// Import products (and categories)
// First import all categories
await importCategories(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled');
// Then import products
await importProducts(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled');
// Then import product-category relationships
await importProductCategories(prodConnection, localConnection);
if (isImportCancelled) throw new Error('Import cancelled');
@@ -800,7 +734,7 @@ async function main() {
operation: 'Import process',
error: error.message
});
throw error; // Re-throw to be handled by caller
throw error;
} finally {
if (prodConnection) await prodConnection.end();
if (localConnection) await localConnection.end();