Split off AI prompt into separate file, auto include taxonomy in prompt, create prompt debug page

This commit is contained in:
2025-02-21 11:50:46 -05:00
parent 7f7e6fdd1f
commit cff176e7a3
5 changed files with 475 additions and 28 deletions

View File

@@ -1,35 +1,253 @@
const express = require('express');
const router = express.Router();
const OpenAI = require('openai');
const fs = require('fs').promises;
const path = require('path');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
// Helper function to create the prompt for product validation
function createValidationPrompt(products) {
return `You are a product data validation assistant. Please review the following product data and suggest corrections or improvements. Focus on:
1. Standardizing product names and descriptions
2. Fixing any obvious errors in measurements, prices, or quantities
3. Ensuring consistency in formatting
4. Flagging any suspicious or invalid values
// Cache configuration
const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds
Here is the product data to validate:
${JSON.stringify(products, null, 2)}
// Cache structure with TTL
let cache = {
taxonomyData: null,
validationPrompt: null,
lastUpdated: null
};
Please respond with:
1. The corrected product data in the exact same JSON format
2. A list of changes made and why
3. Any warnings or suggestions for manual review
Respond in the following JSON format:
{
"correctedData": [], // Array of corrected products
"changes": [], // Array of changes made
"warnings": [] // Array of warnings or suggestions
}`;
// Function to check if cache is valid
function isCacheValid() {
return cache.lastUpdated && (Date.now() - cache.lastUpdated) < CACHE_TTL;
}
// Function to clear cache
function clearCache() {
cache = {
taxonomyData: null,
validationPrompt: null,
lastUpdated: null
};
}
// Debug endpoint to view prompt and cache status
router.get('/debug', async (req, res) => {
try {
console.log('Debug endpoint called');
const pool = req.app.locals.pool;
// Load taxonomy data first
console.log('Loading taxonomy data...');
const taxonomy = await getTaxonomyData(pool);
console.log('Taxonomy data loaded:', {
categoriesCount: taxonomy.categories.length,
themesCount: taxonomy.themes.length,
colorsCount: taxonomy.colors.length,
taxCodesCount: taxonomy.taxCodes.length,
sizeCategoriesCount: taxonomy.sizeCategories.length
});
// Then load the prompt
console.log('Loading prompt...');
const currentPrompt = await loadPrompt(pool);
const sampleData = [{ name: "Sample Product" }];
const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData, null, 2);
const response = {
cacheStatus: {
isCacheValid: isCacheValid(),
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
timeUntilExpiry: cache.lastUpdated ?
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
'expired',
},
taxonomyStats: taxonomy ? {
categories: countItems(taxonomy.categories),
themes: taxonomy.themes.length,
colors: taxonomy.colors.length,
taxCodes: taxonomy.taxCodes.length,
sizeCategories: taxonomy.sizeCategories.length
} : null,
basePrompt: currentPrompt,
sampleFullPrompt: fullPrompt,
promptLength: fullPrompt.length,
};
console.log('Sending response with stats:', response.taxonomyStats);
res.json(response);
} catch (error) {
console.error('Debug endpoint error:', error);
res.status(500).json({ error: error.message });
}
});
// Helper function to count total items in hierarchical structure
function countItems(items) {
return items.reduce((count, item) => {
return count + 1 + (item.subcategories ? countItems(item.subcategories) : 0);
}, 0);
}
// Force cache refresh endpoint
router.post('/refresh-cache', async (req, res) => {
try {
clearCache();
const pool = req.app.locals.pool;
await loadPrompt(pool); // This will rebuild the cache
res.json({
success: true,
message: 'Cache refreshed successfully',
newCacheTime: new Date(cache.lastUpdated).toISOString()
});
} catch (error) {
console.error('Cache refresh error:', error);
res.status(500).json({ error: error.message });
}
});
// Function to fetch and format taxonomy data
async function getTaxonomyData(pool) {
if (cache.taxonomyData && isCacheValid()) {
return cache.taxonomyData;
}
// Fetch categories with hierarchy
const [categories] = await pool.query(`
SELECT cat_id, name, master_cat_id, level_order
FROM (
SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order
FROM product_categories s
WHERE type=10
UNION ALL
SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order
FROM product_categories c
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE c.type=11 AND s.type=10
UNION ALL
SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order
FROM product_categories sc
JOIN product_categories c ON sc.master_cat_id=c.cat_id
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE sc.type=12 AND c.type=11 AND s.type=10
UNION ALL
SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order
FROM product_categories ssc
JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id
JOIN product_categories c ON sc.master_cat_id=c.cat_id
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10
) AS hierarchy
ORDER BY level_order,cat_id
`);
// Fetch themes with hierarchy
const [themes] = await pool.query(`
SELECT cat_id, name, master_cat_id, level_order
FROM (
SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order
FROM product_categories t
WHERE t.type=20
UNION ALL
SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order
FROM product_categories ts
JOIN product_categories t ON ts.master_cat_id=t.cat_id
WHERE ts.type=21 AND t.type=20
) AS hierarchy
ORDER BY level_order,name
`);
// Fetch colors
const [colors] = await pool.query('SELECT color, name FROM product_color_list ORDER BY name');
// Fetch tax codes
const [taxCodes] = await pool.query('SELECT tax_code_id, name FROM product_tax_codes ORDER BY name');
// Fetch size categories
const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
// Format categories into a hierarchical structure
const formatHierarchy = (items, level = 1, parentId = null) => {
return items
.filter(item => item.level_order === level && item.master_cat_id === parentId)
.map(item => {
const children = formatHierarchy(items, level + 1, item.cat_id);
return {
name: item.name,
...(children.length > 0 ? { subcategories: children } : {})
};
});
};
// Format themes similarly but with only two levels
const formatThemes = (items) => {
return items
.filter(item => item.level_order === 1)
.map(item => {
const subthemes = items
.filter(subitem => subitem.master_cat_id === item.cat_id)
.map(subitem => subitem.name);
return {
name: item.name,
...(subthemes.length > 0 ? { subthemes } : {})
};
});
};
cache.taxonomyData = {
categories: formatHierarchy(categories),
themes: formatThemes(themes),
colors: colors.map(c => c.name),
taxCodes: (taxCodes || []).map(tc => ({ id: tc.tax_code_id, name: tc.name })),
sizeCategories: (sizeCategories || []).map(sc => ({ id: sc.cat_id, name: sc.name }))
};
cache.lastUpdated = Date.now();
return cache.taxonomyData;
}
// Load the prompt from file and inject taxonomy data
async function loadPrompt(pool) {
if (cache.validationPrompt && isCacheValid()) {
return cache.validationPrompt;
}
const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
const basePrompt = await fs.readFile(promptPath, 'utf8');
// Get taxonomy data
const taxonomy = await getTaxonomyData(pool);
// Format taxonomy data for the prompt
const taxonomySection = `
Available Categories:
${JSON.stringify(taxonomy.categories)}
Available Themes:
${JSON.stringify(taxonomy.themes)}
Available Colors:
${JSON.stringify(taxonomy.colors)}
Available Tax Codes:
${JSON.stringify(taxonomy.taxCodes)}
Available Size Categories:
${JSON.stringify(taxonomy.sizeCategories)}
Here is the product data to validate:`;
// Combine the prompt sections
cache.validationPrompt = basePrompt + '\n' + taxonomySection;
cache.lastUpdated = Date.now();
return cache.validationPrompt;
}
// Set up cache clearing interval
setInterval(clearCache, CACHE_TTL);
router.post('/validate', async (req, res) => {
try {
const { products } = req.body;
@@ -40,20 +258,22 @@ router.post('/validate', async (req, res) => {
return res.status(400).json({ error: 'Products must be an array' });
}
const prompt = createValidationPrompt(products);
console.log('📝 Generated prompt:', prompt);
// Load the prompt and append the products data
const basePrompt = await loadPrompt(req.app.locals.pool);
const fullPrompt = basePrompt + '\n' + JSON.stringify(products, null, 2);
console.log('📝 Generated prompt:', fullPrompt);
console.log('🤖 Sending request to OpenAI...');
const completion = await openai.chat.completions.create({
model: "gpt-4o-mini",
model: "gpt-4-turbo-preview",
messages: [
{
role: "system",
content: "You are a product data validation assistant that helps ensure product data is accurate, consistent, and properly formatted."
content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
},
{
role: "user",
content: prompt
content: fullPrompt
}
],
temperature: 0.3,