Enhance AI validation with progress tracking and prompt debugging

This commit is contained in:
2025-02-22 20:53:13 -05:00
parent 694014934c
commit 959a64aebc
4 changed files with 660 additions and 67 deletions

View File

@@ -38,51 +38,119 @@ router.get('/debug', async (req, res) => {
console.log('Debug endpoint called');
const pool = req.app.locals.pool;
// Load taxonomy data first
console.log('Loading taxonomy data...');
const taxonomy = await getTaxonomyData(pool);
console.log('Taxonomy data loaded:', {
categoriesCount: taxonomy.categories.length,
themesCount: taxonomy.themes.length,
colorsCount: taxonomy.colors.length,
taxCodesCount: taxonomy.taxCodes.length,
sizeCategoriesCount: taxonomy.sizeCategories.length
});
// Then load the prompt
console.log('Loading prompt...');
const currentPrompt = await loadPrompt(pool);
const sampleData = [{ name: "Sample Product" }];
const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData);
const response = {
cacheStatus: {
isCacheValid: isCacheValid(),
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
timeUntilExpiry: cache.lastUpdated ?
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
'expired',
},
taxonomyStats: taxonomy ? {
categories: countItems(taxonomy.categories),
themes: taxonomy.themes.length,
colors: taxonomy.colors.length,
taxCodes: taxonomy.taxCodes.length,
sizeCategories: taxonomy.sizeCategories.length
} : null,
basePrompt: currentPrompt,
sampleFullPrompt: fullPrompt,
promptLength: fullPrompt.length,
};
console.log('Sending response with stats:', response.taxonomyStats);
res.json(response);
// Get a real supplier, company, and artist ID from the database
const [suppliers] = await pool.query('SELECT supplierid FROM suppliers LIMIT 1');
const [companies] = await pool.query('SELECT cat_id FROM product_categories WHERE type = 1 LIMIT 1');
const [artists] = await pool.query('SELECT cat_id FROM product_categories WHERE type = 40 LIMIT 1');
// Create a sample product with real IDs
const productsToUse = [{
supplierid: suppliers[0]?.supplierid || 1234,
company: companies[0]?.cat_id || 567,
artist: artists[0]?.cat_id || 890
}];
return await generateDebugResponse(pool, productsToUse, res);
} catch (error) {
console.error('Debug endpoint error:', error);
res.status(500).json({ error: error.message });
}
});
// New POST endpoint for debug with products
router.post('/debug', async (req, res) => {
try {
console.log('Debug POST endpoint called');
const pool = req.app.locals.pool;
const { products } = req.body;
console.log('Received products:', {
isArray: Array.isArray(products),
length: products?.length,
firstProduct: products?.[0],
lastProduct: products?.[products?.length - 1]
});
if (!Array.isArray(products)) {
console.error('Invalid input: products is not an array');
return res.status(400).json({ error: 'Products must be an array' });
}
if (products.length === 0) {
console.error('Invalid input: products array is empty');
return res.status(400).json({ error: 'Products array cannot be empty' });
}
// Clean the products array to remove any internal fields
const cleanedProducts = products.map(product => {
const { __errors, __index, ...cleanProduct } = product;
return cleanProduct;
});
return await generateDebugResponse(pool, cleanedProducts, res);
} catch (error) {
console.error('Debug POST endpoint error:', error);
res.status(500).json({ error: error.message });
}
});
// Helper function to generate debug response
async function generateDebugResponse(pool, productsToUse, res) {
// Load taxonomy data first
console.log('Loading taxonomy data...');
const taxonomy = await getTaxonomyData(pool);
console.log('Taxonomy data loaded:', {
categoriesCount: taxonomy.categories.length,
themesCount: taxonomy.themes.length,
colorsCount: taxonomy.colors.length,
taxCodesCount: taxonomy.taxCodes.length,
sizeCategoriesCount: taxonomy.sizeCategories.length,
suppliersCount: taxonomy.suppliers.length,
companiesCount: taxonomy.companies.length,
artistsCount: taxonomy.artists.length
});
// Load the prompt using the same function used by validation
console.log('Loading prompt...');
const prompt = await loadPrompt(pool, productsToUse);
const fullPrompt = prompt + '\n' + JSON.stringify(productsToUse);
const response = {
cacheStatus: {
isCacheValid: isCacheValid(),
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
timeUntilExpiry: cache.lastUpdated ?
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
'expired',
},
taxonomyStats: taxonomy ? {
categories: countItems(taxonomy.categories),
themes: taxonomy.themes.length,
colors: taxonomy.colors.length,
taxCodes: taxonomy.taxCodes.length,
sizeCategories: taxonomy.sizeCategories.length,
suppliers: taxonomy.suppliers.length,
companies: taxonomy.companies.length,
artists: taxonomy.artists.length,
// Add filtered counts when products are provided
filtered: productsToUse ? {
suppliers: taxonomy.suppliers.filter(([id]) =>
productsToUse.some(p => Number(p.supplierid) === Number(id))).length,
companies: taxonomy.companies.filter(([id]) =>
productsToUse.some(p => Number(p.company) === Number(id))).length,
artists: taxonomy.artists.filter(([id]) =>
productsToUse.some(p => Number(p.artist) === Number(id))).length
} : null
} : null,
basePrompt: prompt,
sampleFullPrompt: fullPrompt,
promptLength: fullPrompt.length,
};
console.log('Sending response with stats:', response.taxonomyStats);
return res.json(response);
}
// Helper function to count total items in hierarchical structure
function countItems(items) {
return items.reduce((count, item) => {
@@ -167,6 +235,46 @@ async function getTaxonomyData(pool) {
// Fetch size categories
const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
// Fetch suppliers
const [suppliers] = await pool.query(`
SELECT supplierid, companyname as name
FROM suppliers
WHERE companyname <> ''
ORDER BY companyname
`);
// Fetch companies (type 1)
const [companies] = await pool.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 1
ORDER BY name
`);
// Fetch artists (type 40)
const [artists] = await pool.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 40
ORDER BY name
`);
// Fetch lines (type 2)
const [lines] = await pool.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 2
ORDER BY name
`);
// Fetch sub-lines (type 3)
const [subLines] = await pool.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 3
ORDER BY name
`);
// Format categories into a hierarchical structure
const formatHierarchy = (items, level = 1, parentId = null) => {
return items
@@ -198,7 +306,12 @@ async function getTaxonomyData(pool) {
themes: formatThemes(themes),
colors: colors.map(c => [c.color, c.name]),
taxCodes: (taxCodes || []).map(tc => [tc.tax_code_id, tc.name]),
sizeCategories: (sizeCategories || []).map(sc => [sc.cat_id, sc.name])
sizeCategories: (sizeCategories || []).map(sc => [sc.cat_id, sc.name]),
suppliers: suppliers.map(s => [s.supplierid, s.name]),
companies: companies.map(c => [c.cat_id, c.name]),
artists: artists.map(a => [a.cat_id, a.name]),
lines: lines.map(l => [l.cat_id, l.name]),
subLines: subLines.map(sl => [sl.cat_id, sl.name])
};
cache.lastUpdated = Date.now();
@@ -206,18 +319,113 @@ async function getTaxonomyData(pool) {
}
// Load the prompt from file and inject taxonomy data
async function loadPrompt(pool) {
if (cache.validationPrompt && isCacheValid()) {
return cache.validationPrompt;
}
async function loadPrompt(pool, productsToValidate = null) {
const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
const basePrompt = await fs.readFile(promptPath, 'utf8');
// Get taxonomy data
const taxonomy = await getTaxonomyData(pool);
// Format taxonomy data for the prompt
// Add system instructions to the prompt
const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone.
`;
// If we have products to validate, create a filtered prompt
if (productsToValidate) {
console.log('Creating filtered prompt for products:', productsToValidate);
// Extract unique values from products for non-core attributes
const uniqueValues = {
supplierIds: new Set(),
companyIds: new Set(),
artistIds: new Set(),
lineIds: new Set(),
subLineIds: new Set()
};
// Collect any values that exist in the products
productsToValidate.forEach(product => {
Object.entries(product).forEach(([key, value]) => {
if (value === undefined || value === null) return;
// Map field names to their respective sets
const fieldMap = {
supplierid: 'supplierIds',
supplier: 'supplierIds',
company: 'companyIds',
artist: 'artistIds',
line: 'lineIds',
subline: 'subLineIds'
};
if (fieldMap[key]) {
uniqueValues[fieldMap[key]].add(Number(value));
}
});
});
console.log('Unique values collected:', {
suppliers: Array.from(uniqueValues.supplierIds),
companies: Array.from(uniqueValues.companyIds),
artists: Array.from(uniqueValues.artistIds),
lines: Array.from(uniqueValues.lineIds),
subLines: Array.from(uniqueValues.subLineIds)
});
// Create mixed taxonomy with filtered non-core data and full core data
const mixedTaxonomy = {
// Keep full data for core attributes
categories: taxonomy.categories,
themes: taxonomy.themes,
colors: taxonomy.colors,
taxCodes: taxonomy.taxCodes,
sizeCategories: taxonomy.sizeCategories,
// For non-core data, only include items that are actually used
suppliers: taxonomy.suppliers.filter(([id]) => uniqueValues.supplierIds.has(Number(id))),
companies: taxonomy.companies.filter(([id]) => uniqueValues.companyIds.has(Number(id))),
artists: taxonomy.artists.filter(([id]) => uniqueValues.artistIds.has(Number(id))),
lines: taxonomy.lines.filter(([id]) => uniqueValues.lineIds.has(Number(id))),
subLines: taxonomy.subLines.filter(([id]) => uniqueValues.subLineIds.has(Number(id)))
};
console.log('Filtered taxonomy counts:', {
suppliers: mixedTaxonomy.suppliers.length,
companies: mixedTaxonomy.companies.length,
artists: mixedTaxonomy.artists.length,
lines: mixedTaxonomy.lines.length,
subLines: mixedTaxonomy.subLines.length
});
// Format taxonomy data for the prompt, only including sections with values
const taxonomySection = `
All Available Categories:
${JSON.stringify(mixedTaxonomy.categories)}
All Available Themes:
${JSON.stringify(mixedTaxonomy.themes)}
All Available Colors:
${JSON.stringify(mixedTaxonomy.colors)}
All Available Tax Codes:
${JSON.stringify(mixedTaxonomy.taxCodes)}
All Available Size Categories:
${JSON.stringify(mixedTaxonomy.sizeCategories)}${mixedTaxonomy.suppliers.length ? `\n\nSuppliers Used In This Data:\n${JSON.stringify(mixedTaxonomy.suppliers)}` : ''}${mixedTaxonomy.companies.length ? `\n\nCompanies Used In This Data:\n${JSON.stringify(mixedTaxonomy.companies)}` : ''}${mixedTaxonomy.artists.length ? `\n\nArtists Used In This Data:\n${JSON.stringify(mixedTaxonomy.artists)}` : ''}${mixedTaxonomy.lines.length ? `\n\nLines Used In This Data:\n${JSON.stringify(mixedTaxonomy.lines)}` : ''}${mixedTaxonomy.subLines.length ? `\n\nSub-Lines Used In This Data:\n${JSON.stringify(mixedTaxonomy.subLines)}` : ''}
----------Here is the product data to validate----------`;
// Return the filtered prompt without caching
return systemInstructions + basePrompt + '\n' + taxonomySection;
}
// For debug/display purposes, if no products provided and cache is valid, return cached prompt
if (!productsToValidate && cache.validationPrompt && isCacheValid()) {
return cache.validationPrompt;
}
// Generate and cache the full unfiltered prompt
const taxonomySection = `
Available Categories:
${JSON.stringify(taxonomy.categories)}
@@ -234,10 +442,22 @@ ${JSON.stringify(taxonomy.taxCodes)}
Available Size Categories:
${JSON.stringify(taxonomy.sizeCategories)}
Available Suppliers:
${JSON.stringify(taxonomy.suppliers)}
Available Companies:
${JSON.stringify(taxonomy.companies)}
Available Artists:
${JSON.stringify(taxonomy.artists)}
Available Shipping Restrictions:
${JSON.stringify(taxonomy.shippingRestrictions)}
Here is the product data to validate:`;
// Combine the prompt sections
cache.validationPrompt = basePrompt + '\n' + taxonomySection;
// Cache the full prompt only when no specific products are provided
cache.validationPrompt = systemInstructions + basePrompt + '\n' + taxonomySection;
cache.lastUpdated = Date.now();
return cache.validationPrompt;
@@ -256,19 +476,15 @@ router.post('/validate', async (req, res) => {
return res.status(400).json({ error: 'Products must be an array' });
}
// Load the prompt and append the products data
const basePrompt = await loadPrompt(req.app.locals.pool);
const fullPrompt = basePrompt + '\n' + JSON.stringify(products);
// Load the prompt with the products data to filter taxonomy
const prompt = await loadPrompt(req.app.locals.pool, products);
const fullPrompt = prompt + '\n' + JSON.stringify(products);
console.log('📝 Generated prompt:', fullPrompt);
console.log('🤖 Sending request to OpenAI...');
const completion = await openai.chat.completions.create({
model: "gpt-4-turbo-preview",
model: "gpt-4o",
messages: [
{
role: "system",
content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
},
{
role: "user",
content: fullPrompt