Enhance AI validation with progress tracking and prompt debugging
This commit is contained in:
@@ -38,51 +38,119 @@ router.get('/debug', async (req, res) => {
|
||||
console.log('Debug endpoint called');
|
||||
const pool = req.app.locals.pool;
|
||||
|
||||
// Load taxonomy data first
|
||||
console.log('Loading taxonomy data...');
|
||||
const taxonomy = await getTaxonomyData(pool);
|
||||
console.log('Taxonomy data loaded:', {
|
||||
categoriesCount: taxonomy.categories.length,
|
||||
themesCount: taxonomy.themes.length,
|
||||
colorsCount: taxonomy.colors.length,
|
||||
taxCodesCount: taxonomy.taxCodes.length,
|
||||
sizeCategoriesCount: taxonomy.sizeCategories.length
|
||||
});
|
||||
|
||||
// Then load the prompt
|
||||
console.log('Loading prompt...');
|
||||
const currentPrompt = await loadPrompt(pool);
|
||||
const sampleData = [{ name: "Sample Product" }];
|
||||
const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData);
|
||||
|
||||
const response = {
|
||||
cacheStatus: {
|
||||
isCacheValid: isCacheValid(),
|
||||
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
|
||||
timeUntilExpiry: cache.lastUpdated ?
|
||||
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
|
||||
'expired',
|
||||
},
|
||||
taxonomyStats: taxonomy ? {
|
||||
categories: countItems(taxonomy.categories),
|
||||
themes: taxonomy.themes.length,
|
||||
colors: taxonomy.colors.length,
|
||||
taxCodes: taxonomy.taxCodes.length,
|
||||
sizeCategories: taxonomy.sizeCategories.length
|
||||
} : null,
|
||||
basePrompt: currentPrompt,
|
||||
sampleFullPrompt: fullPrompt,
|
||||
promptLength: fullPrompt.length,
|
||||
};
|
||||
|
||||
console.log('Sending response with stats:', response.taxonomyStats);
|
||||
res.json(response);
|
||||
// Get a real supplier, company, and artist ID from the database
|
||||
const [suppliers] = await pool.query('SELECT supplierid FROM suppliers LIMIT 1');
|
||||
const [companies] = await pool.query('SELECT cat_id FROM product_categories WHERE type = 1 LIMIT 1');
|
||||
const [artists] = await pool.query('SELECT cat_id FROM product_categories WHERE type = 40 LIMIT 1');
|
||||
|
||||
// Create a sample product with real IDs
|
||||
const productsToUse = [{
|
||||
supplierid: suppliers[0]?.supplierid || 1234,
|
||||
company: companies[0]?.cat_id || 567,
|
||||
artist: artists[0]?.cat_id || 890
|
||||
}];
|
||||
|
||||
return await generateDebugResponse(pool, productsToUse, res);
|
||||
} catch (error) {
|
||||
console.error('Debug endpoint error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// New POST endpoint for debug with products
|
||||
router.post('/debug', async (req, res) => {
|
||||
try {
|
||||
console.log('Debug POST endpoint called');
|
||||
const pool = req.app.locals.pool;
|
||||
const { products } = req.body;
|
||||
|
||||
console.log('Received products:', {
|
||||
isArray: Array.isArray(products),
|
||||
length: products?.length,
|
||||
firstProduct: products?.[0],
|
||||
lastProduct: products?.[products?.length - 1]
|
||||
});
|
||||
|
||||
if (!Array.isArray(products)) {
|
||||
console.error('Invalid input: products is not an array');
|
||||
return res.status(400).json({ error: 'Products must be an array' });
|
||||
}
|
||||
|
||||
if (products.length === 0) {
|
||||
console.error('Invalid input: products array is empty');
|
||||
return res.status(400).json({ error: 'Products array cannot be empty' });
|
||||
}
|
||||
|
||||
// Clean the products array to remove any internal fields
|
||||
const cleanedProducts = products.map(product => {
|
||||
const { __errors, __index, ...cleanProduct } = product;
|
||||
return cleanProduct;
|
||||
});
|
||||
|
||||
return await generateDebugResponse(pool, cleanedProducts, res);
|
||||
} catch (error) {
|
||||
console.error('Debug POST endpoint error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Helper function to generate debug response
|
||||
async function generateDebugResponse(pool, productsToUse, res) {
|
||||
// Load taxonomy data first
|
||||
console.log('Loading taxonomy data...');
|
||||
const taxonomy = await getTaxonomyData(pool);
|
||||
console.log('Taxonomy data loaded:', {
|
||||
categoriesCount: taxonomy.categories.length,
|
||||
themesCount: taxonomy.themes.length,
|
||||
colorsCount: taxonomy.colors.length,
|
||||
taxCodesCount: taxonomy.taxCodes.length,
|
||||
sizeCategoriesCount: taxonomy.sizeCategories.length,
|
||||
suppliersCount: taxonomy.suppliers.length,
|
||||
companiesCount: taxonomy.companies.length,
|
||||
artistsCount: taxonomy.artists.length
|
||||
});
|
||||
|
||||
// Load the prompt using the same function used by validation
|
||||
console.log('Loading prompt...');
|
||||
const prompt = await loadPrompt(pool, productsToUse);
|
||||
const fullPrompt = prompt + '\n' + JSON.stringify(productsToUse);
|
||||
|
||||
const response = {
|
||||
cacheStatus: {
|
||||
isCacheValid: isCacheValid(),
|
||||
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
|
||||
timeUntilExpiry: cache.lastUpdated ?
|
||||
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
|
||||
'expired',
|
||||
},
|
||||
taxonomyStats: taxonomy ? {
|
||||
categories: countItems(taxonomy.categories),
|
||||
themes: taxonomy.themes.length,
|
||||
colors: taxonomy.colors.length,
|
||||
taxCodes: taxonomy.taxCodes.length,
|
||||
sizeCategories: taxonomy.sizeCategories.length,
|
||||
suppliers: taxonomy.suppliers.length,
|
||||
companies: taxonomy.companies.length,
|
||||
artists: taxonomy.artists.length,
|
||||
// Add filtered counts when products are provided
|
||||
filtered: productsToUse ? {
|
||||
suppliers: taxonomy.suppliers.filter(([id]) =>
|
||||
productsToUse.some(p => Number(p.supplierid) === Number(id))).length,
|
||||
companies: taxonomy.companies.filter(([id]) =>
|
||||
productsToUse.some(p => Number(p.company) === Number(id))).length,
|
||||
artists: taxonomy.artists.filter(([id]) =>
|
||||
productsToUse.some(p => Number(p.artist) === Number(id))).length
|
||||
} : null
|
||||
} : null,
|
||||
basePrompt: prompt,
|
||||
sampleFullPrompt: fullPrompt,
|
||||
promptLength: fullPrompt.length,
|
||||
};
|
||||
|
||||
console.log('Sending response with stats:', response.taxonomyStats);
|
||||
return res.json(response);
|
||||
}
|
||||
|
||||
// Helper function to count total items in hierarchical structure
|
||||
function countItems(items) {
|
||||
return items.reduce((count, item) => {
|
||||
@@ -167,6 +235,46 @@ async function getTaxonomyData(pool) {
|
||||
// Fetch size categories
|
||||
const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
|
||||
|
||||
// Fetch suppliers
|
||||
const [suppliers] = await pool.query(`
|
||||
SELECT supplierid, companyname as name
|
||||
FROM suppliers
|
||||
WHERE companyname <> ''
|
||||
ORDER BY companyname
|
||||
`);
|
||||
|
||||
// Fetch companies (type 1)
|
||||
const [companies] = await pool.query(`
|
||||
SELECT cat_id, name
|
||||
FROM product_categories
|
||||
WHERE type = 1
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
// Fetch artists (type 40)
|
||||
const [artists] = await pool.query(`
|
||||
SELECT cat_id, name
|
||||
FROM product_categories
|
||||
WHERE type = 40
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
// Fetch lines (type 2)
|
||||
const [lines] = await pool.query(`
|
||||
SELECT cat_id, name
|
||||
FROM product_categories
|
||||
WHERE type = 2
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
// Fetch sub-lines (type 3)
|
||||
const [subLines] = await pool.query(`
|
||||
SELECT cat_id, name
|
||||
FROM product_categories
|
||||
WHERE type = 3
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
// Format categories into a hierarchical structure
|
||||
const formatHierarchy = (items, level = 1, parentId = null) => {
|
||||
return items
|
||||
@@ -198,7 +306,12 @@ async function getTaxonomyData(pool) {
|
||||
themes: formatThemes(themes),
|
||||
colors: colors.map(c => [c.color, c.name]),
|
||||
taxCodes: (taxCodes || []).map(tc => [tc.tax_code_id, tc.name]),
|
||||
sizeCategories: (sizeCategories || []).map(sc => [sc.cat_id, sc.name])
|
||||
sizeCategories: (sizeCategories || []).map(sc => [sc.cat_id, sc.name]),
|
||||
suppliers: suppliers.map(s => [s.supplierid, s.name]),
|
||||
companies: companies.map(c => [c.cat_id, c.name]),
|
||||
artists: artists.map(a => [a.cat_id, a.name]),
|
||||
lines: lines.map(l => [l.cat_id, l.name]),
|
||||
subLines: subLines.map(sl => [sl.cat_id, sl.name])
|
||||
};
|
||||
cache.lastUpdated = Date.now();
|
||||
|
||||
@@ -206,18 +319,113 @@ async function getTaxonomyData(pool) {
|
||||
}
|
||||
|
||||
// Load the prompt from file and inject taxonomy data
|
||||
async function loadPrompt(pool) {
|
||||
if (cache.validationPrompt && isCacheValid()) {
|
||||
return cache.validationPrompt;
|
||||
}
|
||||
|
||||
async function loadPrompt(pool, productsToValidate = null) {
|
||||
const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
|
||||
const basePrompt = await fs.readFile(promptPath, 'utf8');
|
||||
|
||||
// Get taxonomy data
|
||||
const taxonomy = await getTaxonomyData(pool);
|
||||
|
||||
// Format taxonomy data for the prompt
|
||||
|
||||
// Add system instructions to the prompt
|
||||
const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone.
|
||||
|
||||
`;
|
||||
|
||||
// If we have products to validate, create a filtered prompt
|
||||
if (productsToValidate) {
|
||||
console.log('Creating filtered prompt for products:', productsToValidate);
|
||||
|
||||
// Extract unique values from products for non-core attributes
|
||||
const uniqueValues = {
|
||||
supplierIds: new Set(),
|
||||
companyIds: new Set(),
|
||||
artistIds: new Set(),
|
||||
lineIds: new Set(),
|
||||
subLineIds: new Set()
|
||||
};
|
||||
|
||||
// Collect any values that exist in the products
|
||||
productsToValidate.forEach(product => {
|
||||
Object.entries(product).forEach(([key, value]) => {
|
||||
if (value === undefined || value === null) return;
|
||||
|
||||
// Map field names to their respective sets
|
||||
const fieldMap = {
|
||||
supplierid: 'supplierIds',
|
||||
supplier: 'supplierIds',
|
||||
company: 'companyIds',
|
||||
artist: 'artistIds',
|
||||
line: 'lineIds',
|
||||
subline: 'subLineIds'
|
||||
};
|
||||
|
||||
if (fieldMap[key]) {
|
||||
uniqueValues[fieldMap[key]].add(Number(value));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
console.log('Unique values collected:', {
|
||||
suppliers: Array.from(uniqueValues.supplierIds),
|
||||
companies: Array.from(uniqueValues.companyIds),
|
||||
artists: Array.from(uniqueValues.artistIds),
|
||||
lines: Array.from(uniqueValues.lineIds),
|
||||
subLines: Array.from(uniqueValues.subLineIds)
|
||||
});
|
||||
|
||||
// Create mixed taxonomy with filtered non-core data and full core data
|
||||
const mixedTaxonomy = {
|
||||
// Keep full data for core attributes
|
||||
categories: taxonomy.categories,
|
||||
themes: taxonomy.themes,
|
||||
colors: taxonomy.colors,
|
||||
taxCodes: taxonomy.taxCodes,
|
||||
sizeCategories: taxonomy.sizeCategories,
|
||||
// For non-core data, only include items that are actually used
|
||||
suppliers: taxonomy.suppliers.filter(([id]) => uniqueValues.supplierIds.has(Number(id))),
|
||||
companies: taxonomy.companies.filter(([id]) => uniqueValues.companyIds.has(Number(id))),
|
||||
artists: taxonomy.artists.filter(([id]) => uniqueValues.artistIds.has(Number(id))),
|
||||
lines: taxonomy.lines.filter(([id]) => uniqueValues.lineIds.has(Number(id))),
|
||||
subLines: taxonomy.subLines.filter(([id]) => uniqueValues.subLineIds.has(Number(id)))
|
||||
};
|
||||
|
||||
console.log('Filtered taxonomy counts:', {
|
||||
suppliers: mixedTaxonomy.suppliers.length,
|
||||
companies: mixedTaxonomy.companies.length,
|
||||
artists: mixedTaxonomy.artists.length,
|
||||
lines: mixedTaxonomy.lines.length,
|
||||
subLines: mixedTaxonomy.subLines.length
|
||||
});
|
||||
|
||||
// Format taxonomy data for the prompt, only including sections with values
|
||||
const taxonomySection = `
|
||||
All Available Categories:
|
||||
${JSON.stringify(mixedTaxonomy.categories)}
|
||||
|
||||
All Available Themes:
|
||||
${JSON.stringify(mixedTaxonomy.themes)}
|
||||
|
||||
All Available Colors:
|
||||
${JSON.stringify(mixedTaxonomy.colors)}
|
||||
|
||||
All Available Tax Codes:
|
||||
${JSON.stringify(mixedTaxonomy.taxCodes)}
|
||||
|
||||
All Available Size Categories:
|
||||
${JSON.stringify(mixedTaxonomy.sizeCategories)}${mixedTaxonomy.suppliers.length ? `\n\nSuppliers Used In This Data:\n${JSON.stringify(mixedTaxonomy.suppliers)}` : ''}${mixedTaxonomy.companies.length ? `\n\nCompanies Used In This Data:\n${JSON.stringify(mixedTaxonomy.companies)}` : ''}${mixedTaxonomy.artists.length ? `\n\nArtists Used In This Data:\n${JSON.stringify(mixedTaxonomy.artists)}` : ''}${mixedTaxonomy.lines.length ? `\n\nLines Used In This Data:\n${JSON.stringify(mixedTaxonomy.lines)}` : ''}${mixedTaxonomy.subLines.length ? `\n\nSub-Lines Used In This Data:\n${JSON.stringify(mixedTaxonomy.subLines)}` : ''}
|
||||
|
||||
----------Here is the product data to validate----------`;
|
||||
|
||||
// Return the filtered prompt without caching
|
||||
return systemInstructions + basePrompt + '\n' + taxonomySection;
|
||||
}
|
||||
|
||||
// For debug/display purposes, if no products provided and cache is valid, return cached prompt
|
||||
if (!productsToValidate && cache.validationPrompt && isCacheValid()) {
|
||||
return cache.validationPrompt;
|
||||
}
|
||||
|
||||
// Generate and cache the full unfiltered prompt
|
||||
const taxonomySection = `
|
||||
Available Categories:
|
||||
${JSON.stringify(taxonomy.categories)}
|
||||
@@ -234,10 +442,22 @@ ${JSON.stringify(taxonomy.taxCodes)}
|
||||
Available Size Categories:
|
||||
${JSON.stringify(taxonomy.sizeCategories)}
|
||||
|
||||
Available Suppliers:
|
||||
${JSON.stringify(taxonomy.suppliers)}
|
||||
|
||||
Available Companies:
|
||||
${JSON.stringify(taxonomy.companies)}
|
||||
|
||||
Available Artists:
|
||||
${JSON.stringify(taxonomy.artists)}
|
||||
|
||||
Available Shipping Restrictions:
|
||||
${JSON.stringify(taxonomy.shippingRestrictions)}
|
||||
|
||||
Here is the product data to validate:`;
|
||||
|
||||
// Combine the prompt sections
|
||||
cache.validationPrompt = basePrompt + '\n' + taxonomySection;
|
||||
// Cache the full prompt only when no specific products are provided
|
||||
cache.validationPrompt = systemInstructions + basePrompt + '\n' + taxonomySection;
|
||||
cache.lastUpdated = Date.now();
|
||||
|
||||
return cache.validationPrompt;
|
||||
@@ -256,19 +476,15 @@ router.post('/validate', async (req, res) => {
|
||||
return res.status(400).json({ error: 'Products must be an array' });
|
||||
}
|
||||
|
||||
// Load the prompt and append the products data
|
||||
const basePrompt = await loadPrompt(req.app.locals.pool);
|
||||
const fullPrompt = basePrompt + '\n' + JSON.stringify(products);
|
||||
// Load the prompt with the products data to filter taxonomy
|
||||
const prompt = await loadPrompt(req.app.locals.pool, products);
|
||||
const fullPrompt = prompt + '\n' + JSON.stringify(products);
|
||||
console.log('📝 Generated prompt:', fullPrompt);
|
||||
|
||||
console.log('🤖 Sending request to OpenAI...');
|
||||
const completion = await openai.chat.completions.create({
|
||||
model: "gpt-4-turbo-preview",
|
||||
model: "gpt-4o",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: fullPrompt
|
||||
|
||||
Reference in New Issue
Block a user