Split off AI prompt into separate file, auto include taxonomy in prompt, create prompt debug page
This commit is contained in:
17
inventory-server/src/prompts/product-validation.txt
Normal file
17
inventory-server/src/prompts/product-validation.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
I will provide a JSON array with product data. Process the array by combining all products from validData and invalidData arrays into a single array, excluding any fields starting with “__”, such as “__index” or “__errors”. Process each product according to the reference guidelines below. If a field is not included in the data, do not include it in your response unless the specific field guidelines below say otherwise. Please respond with:
|
||||
|
||||
Respond in the following JSON format:
|
||||
{
|
||||
"correctedData": [], // Array of corrected products
|
||||
"changes": [], // Array of strings describing each change made
|
||||
"warnings": [] // Array of strings with warnings or suggestions for manual review
|
||||
}
|
||||
|
||||
Using the provided guidelines, focus on:
|
||||
1. Correcting typos and any incorrect spelling or grammar
|
||||
2. Standardizing product names
|
||||
3. Correcting and enhancing descriptions by adding details, keywords, and SEO-friendly language
|
||||
4. Fixing any obvious errors in measurements, prices, or quantities
|
||||
5. Adding correct categories, themes, and colors
|
||||
|
||||
Use only the provided data and your own knowledge to make changes. Do not make assumptions or make up information that you're not sure about. If you're unable to make a change you're confident about, leave the field as is.
|
||||
@@ -1,35 +1,253 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const OpenAI = require('openai');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY
|
||||
});
|
||||
|
||||
// Helper function to create the prompt for product validation
|
||||
function createValidationPrompt(products) {
|
||||
return `You are a product data validation assistant. Please review the following product data and suggest corrections or improvements. Focus on:
|
||||
1. Standardizing product names and descriptions
|
||||
2. Fixing any obvious errors in measurements, prices, or quantities
|
||||
3. Ensuring consistency in formatting
|
||||
4. Flagging any suspicious or invalid values
|
||||
// Cache configuration
|
||||
const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds
|
||||
|
||||
Here is the product data to validate:
|
||||
${JSON.stringify(products, null, 2)}
|
||||
// Cache structure with TTL
|
||||
let cache = {
|
||||
taxonomyData: null,
|
||||
validationPrompt: null,
|
||||
lastUpdated: null
|
||||
};
|
||||
|
||||
Please respond with:
|
||||
1. The corrected product data in the exact same JSON format
|
||||
2. A list of changes made and why
|
||||
3. Any warnings or suggestions for manual review
|
||||
|
||||
Respond in the following JSON format:
|
||||
{
|
||||
"correctedData": [], // Array of corrected products
|
||||
"changes": [], // Array of changes made
|
||||
"warnings": [] // Array of warnings or suggestions
|
||||
}`;
|
||||
// Function to check if cache is valid
|
||||
function isCacheValid() {
|
||||
return cache.lastUpdated && (Date.now() - cache.lastUpdated) < CACHE_TTL;
|
||||
}
|
||||
|
||||
// Function to clear cache
|
||||
function clearCache() {
|
||||
cache = {
|
||||
taxonomyData: null,
|
||||
validationPrompt: null,
|
||||
lastUpdated: null
|
||||
};
|
||||
}
|
||||
|
||||
// Debug endpoint to view prompt and cache status
|
||||
router.get('/debug', async (req, res) => {
|
||||
try {
|
||||
console.log('Debug endpoint called');
|
||||
const pool = req.app.locals.pool;
|
||||
|
||||
// Load taxonomy data first
|
||||
console.log('Loading taxonomy data...');
|
||||
const taxonomy = await getTaxonomyData(pool);
|
||||
console.log('Taxonomy data loaded:', {
|
||||
categoriesCount: taxonomy.categories.length,
|
||||
themesCount: taxonomy.themes.length,
|
||||
colorsCount: taxonomy.colors.length,
|
||||
taxCodesCount: taxonomy.taxCodes.length,
|
||||
sizeCategoriesCount: taxonomy.sizeCategories.length
|
||||
});
|
||||
|
||||
// Then load the prompt
|
||||
console.log('Loading prompt...');
|
||||
const currentPrompt = await loadPrompt(pool);
|
||||
const sampleData = [{ name: "Sample Product" }];
|
||||
const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData, null, 2);
|
||||
|
||||
const response = {
|
||||
cacheStatus: {
|
||||
isCacheValid: isCacheValid(),
|
||||
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
|
||||
timeUntilExpiry: cache.lastUpdated ?
|
||||
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
|
||||
'expired',
|
||||
},
|
||||
taxonomyStats: taxonomy ? {
|
||||
categories: countItems(taxonomy.categories),
|
||||
themes: taxonomy.themes.length,
|
||||
colors: taxonomy.colors.length,
|
||||
taxCodes: taxonomy.taxCodes.length,
|
||||
sizeCategories: taxonomy.sizeCategories.length
|
||||
} : null,
|
||||
basePrompt: currentPrompt,
|
||||
sampleFullPrompt: fullPrompt,
|
||||
promptLength: fullPrompt.length,
|
||||
};
|
||||
|
||||
console.log('Sending response with stats:', response.taxonomyStats);
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
console.error('Debug endpoint error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Helper function to count total items in hierarchical structure
|
||||
function countItems(items) {
|
||||
return items.reduce((count, item) => {
|
||||
return count + 1 + (item.subcategories ? countItems(item.subcategories) : 0);
|
||||
}, 0);
|
||||
}
|
||||
|
||||
// Force cache refresh endpoint
|
||||
router.post('/refresh-cache', async (req, res) => {
|
||||
try {
|
||||
clearCache();
|
||||
const pool = req.app.locals.pool;
|
||||
await loadPrompt(pool); // This will rebuild the cache
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Cache refreshed successfully',
|
||||
newCacheTime: new Date(cache.lastUpdated).toISOString()
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Cache refresh error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Function to fetch and format taxonomy data
|
||||
async function getTaxonomyData(pool) {
|
||||
if (cache.taxonomyData && isCacheValid()) {
|
||||
return cache.taxonomyData;
|
||||
}
|
||||
|
||||
// Fetch categories with hierarchy
|
||||
const [categories] = await pool.query(`
|
||||
SELECT cat_id, name, master_cat_id, level_order
|
||||
FROM (
|
||||
SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order
|
||||
FROM product_categories s
|
||||
WHERE type=10
|
||||
UNION ALL
|
||||
SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order
|
||||
FROM product_categories c
|
||||
JOIN product_categories s ON c.master_cat_id=s.cat_id
|
||||
WHERE c.type=11 AND s.type=10
|
||||
UNION ALL
|
||||
SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order
|
||||
FROM product_categories sc
|
||||
JOIN product_categories c ON sc.master_cat_id=c.cat_id
|
||||
JOIN product_categories s ON c.master_cat_id=s.cat_id
|
||||
WHERE sc.type=12 AND c.type=11 AND s.type=10
|
||||
UNION ALL
|
||||
SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order
|
||||
FROM product_categories ssc
|
||||
JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id
|
||||
JOIN product_categories c ON sc.master_cat_id=c.cat_id
|
||||
JOIN product_categories s ON c.master_cat_id=s.cat_id
|
||||
WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10
|
||||
) AS hierarchy
|
||||
ORDER BY level_order,cat_id
|
||||
`);
|
||||
|
||||
// Fetch themes with hierarchy
|
||||
const [themes] = await pool.query(`
|
||||
SELECT cat_id, name, master_cat_id, level_order
|
||||
FROM (
|
||||
SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order
|
||||
FROM product_categories t
|
||||
WHERE t.type=20
|
||||
UNION ALL
|
||||
SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order
|
||||
FROM product_categories ts
|
||||
JOIN product_categories t ON ts.master_cat_id=t.cat_id
|
||||
WHERE ts.type=21 AND t.type=20
|
||||
) AS hierarchy
|
||||
ORDER BY level_order,name
|
||||
`);
|
||||
|
||||
// Fetch colors
|
||||
const [colors] = await pool.query('SELECT color, name FROM product_color_list ORDER BY name');
|
||||
|
||||
// Fetch tax codes
|
||||
const [taxCodes] = await pool.query('SELECT tax_code_id, name FROM product_tax_codes ORDER BY name');
|
||||
|
||||
// Fetch size categories
|
||||
const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
|
||||
|
||||
// Format categories into a hierarchical structure
|
||||
const formatHierarchy = (items, level = 1, parentId = null) => {
|
||||
return items
|
||||
.filter(item => item.level_order === level && item.master_cat_id === parentId)
|
||||
.map(item => {
|
||||
const children = formatHierarchy(items, level + 1, item.cat_id);
|
||||
return {
|
||||
name: item.name,
|
||||
...(children.length > 0 ? { subcategories: children } : {})
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
// Format themes similarly but with only two levels
|
||||
const formatThemes = (items) => {
|
||||
return items
|
||||
.filter(item => item.level_order === 1)
|
||||
.map(item => {
|
||||
const subthemes = items
|
||||
.filter(subitem => subitem.master_cat_id === item.cat_id)
|
||||
.map(subitem => subitem.name);
|
||||
return {
|
||||
name: item.name,
|
||||
...(subthemes.length > 0 ? { subthemes } : {})
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
cache.taxonomyData = {
|
||||
categories: formatHierarchy(categories),
|
||||
themes: formatThemes(themes),
|
||||
colors: colors.map(c => c.name),
|
||||
taxCodes: (taxCodes || []).map(tc => ({ id: tc.tax_code_id, name: tc.name })),
|
||||
sizeCategories: (sizeCategories || []).map(sc => ({ id: sc.cat_id, name: sc.name }))
|
||||
};
|
||||
cache.lastUpdated = Date.now();
|
||||
|
||||
return cache.taxonomyData;
|
||||
}
|
||||
|
||||
// Load the prompt from file and inject taxonomy data
|
||||
async function loadPrompt(pool) {
|
||||
if (cache.validationPrompt && isCacheValid()) {
|
||||
return cache.validationPrompt;
|
||||
}
|
||||
|
||||
const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
|
||||
const basePrompt = await fs.readFile(promptPath, 'utf8');
|
||||
|
||||
// Get taxonomy data
|
||||
const taxonomy = await getTaxonomyData(pool);
|
||||
|
||||
// Format taxonomy data for the prompt
|
||||
const taxonomySection = `
|
||||
Available Categories:
|
||||
${JSON.stringify(taxonomy.categories)}
|
||||
|
||||
Available Themes:
|
||||
${JSON.stringify(taxonomy.themes)}
|
||||
|
||||
Available Colors:
|
||||
${JSON.stringify(taxonomy.colors)}
|
||||
|
||||
Available Tax Codes:
|
||||
${JSON.stringify(taxonomy.taxCodes)}
|
||||
|
||||
Available Size Categories:
|
||||
${JSON.stringify(taxonomy.sizeCategories)}
|
||||
|
||||
Here is the product data to validate:`;
|
||||
|
||||
// Combine the prompt sections
|
||||
cache.validationPrompt = basePrompt + '\n' + taxonomySection;
|
||||
cache.lastUpdated = Date.now();
|
||||
|
||||
return cache.validationPrompt;
|
||||
}
|
||||
|
||||
// Set up cache clearing interval
|
||||
setInterval(clearCache, CACHE_TTL);
|
||||
|
||||
router.post('/validate', async (req, res) => {
|
||||
try {
|
||||
const { products } = req.body;
|
||||
@@ -40,20 +258,22 @@ router.post('/validate', async (req, res) => {
|
||||
return res.status(400).json({ error: 'Products must be an array' });
|
||||
}
|
||||
|
||||
const prompt = createValidationPrompt(products);
|
||||
console.log('📝 Generated prompt:', prompt);
|
||||
// Load the prompt and append the products data
|
||||
const basePrompt = await loadPrompt(req.app.locals.pool);
|
||||
const fullPrompt = basePrompt + '\n' + JSON.stringify(products, null, 2);
|
||||
console.log('📝 Generated prompt:', fullPrompt);
|
||||
|
||||
console.log('🤖 Sending request to OpenAI...');
|
||||
const completion = await openai.chat.completions.create({
|
||||
model: "gpt-4o-mini",
|
||||
model: "gpt-4-turbo-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a product data validation assistant that helps ensure product data is accurate, consistent, and properly formatted."
|
||||
content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: prompt
|
||||
content: fullPrompt
|
||||
}
|
||||
],
|
||||
temperature: 0.3,
|
||||
|
||||
Reference in New Issue
Block a user