Split off AI prompt into separate file, auto include taxonomy in prompt, create prompt debug page

2025-02-21 11:50:46 -05:00
parent 7f7e6fdd1f
commit cff176e7a3
5 changed files with 475 additions and 28 deletions
--- a/inventory-server/src/routes/ai-validation.js
+++ b/inventory-server/src/routes/ai-validation.js
@@ -1,35 +1,253 @@
 const express = require('express');
 const router = express.Router();
 const OpenAI = require('openai');
+const fs = require('fs').promises;
+const path = require('path');

 const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
 });

-// Helper function to create the prompt for product validation
-function createValidationPrompt(products) {
-  return `You are a product data validation assistant. Please review the following product data and suggest corrections or improvements. Focus on:
-1. Standardizing product names and descriptions
-2. Fixing any obvious errors in measurements, prices, or quantities
-3. Ensuring consistency in formatting
-4. Flagging any suspicious or invalid values
+// Cache configuration
+const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds

-Here is the product data to validate:
-${JSON.stringify(products, null, 2)}
+// Cache structure with TTL
+let cache = {
+  taxonomyData: null,
+  validationPrompt: null,
+  lastUpdated: null
+};

-Please respond with:
-1. The corrected product data in the exact same JSON format
-2. A list of changes made and why
-3. Any warnings or suggestions for manual review
-
-Respond in the following JSON format:
-{
-  "correctedData": [], // Array of corrected products
-  "changes": [], // Array of changes made
-  "warnings": [] // Array of warnings or suggestions
-}`;
+// Function to check if cache is valid
+function isCacheValid() {
+  return cache.lastUpdated && (Date.now() - cache.lastUpdated) < CACHE_TTL;
 }

+// Function to clear cache
+function clearCache() {
+  cache = {
+    taxonomyData: null,
+    validationPrompt: null,
+    lastUpdated: null
+  };
+}
+
+// Debug endpoint to view prompt and cache status
+router.get('/debug', async (req, res) => {
+  try {
+    console.log('Debug endpoint called');
+    const pool = req.app.locals.pool;
+    
+    // Load taxonomy data first
+    console.log('Loading taxonomy data...');
+    const taxonomy = await getTaxonomyData(pool);
+    console.log('Taxonomy data loaded:', {
+      categoriesCount: taxonomy.categories.length,
+      themesCount: taxonomy.themes.length,
+      colorsCount: taxonomy.colors.length,
+      taxCodesCount: taxonomy.taxCodes.length,
+      sizeCategoriesCount: taxonomy.sizeCategories.length
+    });
+
+    // Then load the prompt
+    console.log('Loading prompt...');
+    const currentPrompt = await loadPrompt(pool);
+    const sampleData = [{ name: "Sample Product" }];
+    const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData, null, 2);
+
+    const response = {
+      cacheStatus: {
+        isCacheValid: isCacheValid(),
+        lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
+        timeUntilExpiry: cache.lastUpdated ? 
+          Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' : 
+          'expired',
+      },
+      taxonomyStats: taxonomy ? {
+        categories: countItems(taxonomy.categories),
+        themes: taxonomy.themes.length,
+        colors: taxonomy.colors.length,
+        taxCodes: taxonomy.taxCodes.length,
+        sizeCategories: taxonomy.sizeCategories.length
+      } : null,
+      basePrompt: currentPrompt,
+      sampleFullPrompt: fullPrompt,
+      promptLength: fullPrompt.length,
+    };
+
+    console.log('Sending response with stats:', response.taxonomyStats);
+    res.json(response);
+  } catch (error) {
+    console.error('Debug endpoint error:', error);
+    res.status(500).json({ error: error.message });
+  }
+});
+
+// Helper function to count total items in hierarchical structure
+function countItems(items) {
+  return items.reduce((count, item) => {
+    return count + 1 + (item.subcategories ? countItems(item.subcategories) : 0);
+  }, 0);
+}
+
+// Force cache refresh endpoint
+router.post('/refresh-cache', async (req, res) => {
+  try {
+    clearCache();
+    const pool = req.app.locals.pool;
+    await loadPrompt(pool); // This will rebuild the cache
+    res.json({ 
+      success: true, 
+      message: 'Cache refreshed successfully',
+      newCacheTime: new Date(cache.lastUpdated).toISOString()
+    });
+  } catch (error) {
+    console.error('Cache refresh error:', error);
+    res.status(500).json({ error: error.message });
+  }
+});
+
+// Function to fetch and format taxonomy data
+async function getTaxonomyData(pool) {
+  if (cache.taxonomyData && isCacheValid()) {
+    return cache.taxonomyData;
+  }
+
+  // Fetch categories with hierarchy
+  const [categories] = await pool.query(`
+    SELECT cat_id, name, master_cat_id, level_order 
+    FROM (
+      SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order 
+      FROM product_categories s 
+      WHERE type=10 
+      UNION ALL 
+      SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order 
+      FROM product_categories c 
+      JOIN product_categories s ON c.master_cat_id=s.cat_id 
+      WHERE c.type=11 AND s.type=10 
+      UNION ALL 
+      SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order 
+      FROM product_categories sc 
+      JOIN product_categories c ON sc.master_cat_id=c.cat_id 
+      JOIN product_categories s ON c.master_cat_id=s.cat_id 
+      WHERE sc.type=12 AND c.type=11 AND s.type=10 
+      UNION ALL 
+      SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order 
+      FROM product_categories ssc 
+      JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id 
+      JOIN product_categories c ON sc.master_cat_id=c.cat_id 
+      JOIN product_categories s ON c.master_cat_id=s.cat_id 
+      WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10
+    ) AS hierarchy 
+    ORDER BY level_order,cat_id
+  `);
+
+  // Fetch themes with hierarchy
+  const [themes] = await pool.query(`
+    SELECT cat_id, name, master_cat_id, level_order 
+    FROM (
+      SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order 
+      FROM product_categories t 
+      WHERE t.type=20 
+      UNION ALL 
+      SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order 
+      FROM product_categories ts 
+      JOIN product_categories t ON ts.master_cat_id=t.cat_id 
+      WHERE ts.type=21 AND t.type=20
+    ) AS hierarchy 
+    ORDER BY level_order,name
+  `);
+
+  // Fetch colors
+  const [colors] = await pool.query('SELECT color, name FROM product_color_list ORDER BY name');
+
+  // Fetch tax codes
+  const [taxCodes] = await pool.query('SELECT tax_code_id, name FROM product_tax_codes ORDER BY name');
+
+  // Fetch size categories
+  const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
+
+  // Format categories into a hierarchical structure
+  const formatHierarchy = (items, level = 1, parentId = null) => {
+    return items
+      .filter(item => item.level_order === level && item.master_cat_id === parentId)
+      .map(item => {
+        const children = formatHierarchy(items, level + 1, item.cat_id);
+        return {
+          name: item.name,
+          ...(children.length > 0 ? { subcategories: children } : {})
+        };
+      });
+  };
+
+  // Format themes similarly but with only two levels
+  const formatThemes = (items) => {
+    return items
+      .filter(item => item.level_order === 1)
+      .map(item => {
+        const subthemes = items
+          .filter(subitem => subitem.master_cat_id === item.cat_id)
+          .map(subitem => subitem.name);
+        return {
+          name: item.name,
+          ...(subthemes.length > 0 ? { subthemes } : {})
+        };
+      });
+  };
+
+  cache.taxonomyData = {
+    categories: formatHierarchy(categories),
+    themes: formatThemes(themes),
+    colors: colors.map(c => c.name),
+    taxCodes: (taxCodes || []).map(tc => ({ id: tc.tax_code_id, name: tc.name })),
+    sizeCategories: (sizeCategories || []).map(sc => ({ id: sc.cat_id, name: sc.name }))
+  };
+  cache.lastUpdated = Date.now();
+
+  return cache.taxonomyData;
+}
+
+// Load the prompt from file and inject taxonomy data
+async function loadPrompt(pool) {
+  if (cache.validationPrompt && isCacheValid()) {
+    return cache.validationPrompt;
+  }
+
+  const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
+  const basePrompt = await fs.readFile(promptPath, 'utf8');
+  
+  // Get taxonomy data
+  const taxonomy = await getTaxonomyData(pool);
+  
+  // Format taxonomy data for the prompt
+  const taxonomySection = `
+Available Categories:
+${JSON.stringify(taxonomy.categories)}
+
+Available Themes:
+${JSON.stringify(taxonomy.themes)}
+
+Available Colors:
+${JSON.stringify(taxonomy.colors)}
+
+Available Tax Codes:
+${JSON.stringify(taxonomy.taxCodes)}
+
+Available Size Categories:
+${JSON.stringify(taxonomy.sizeCategories)}
+
+Here is the product data to validate:`;
+
+  // Combine the prompt sections
+  cache.validationPrompt = basePrompt + '\n' + taxonomySection;
+  cache.lastUpdated = Date.now();
+
+  return cache.validationPrompt;
+}
+
+// Set up cache clearing interval
+setInterval(clearCache, CACHE_TTL);
+
 router.post('/validate', async (req, res) => {
  try {
    const { products } = req.body;
@@ -40,20 +258,22 @@ router.post('/validate', async (req, res) => {
      return res.status(400).json({ error: 'Products must be an array' });
    }

-    const prompt = createValidationPrompt(products);
-    console.log('📝 Generated prompt:', prompt);
+    // Load the prompt and append the products data
+    const basePrompt = await loadPrompt(req.app.locals.pool);
+    const fullPrompt = basePrompt + '\n' + JSON.stringify(products, null, 2);
+    console.log('📝 Generated prompt:', fullPrompt);

    console.log('🤖 Sending request to OpenAI...');
    const completion = await openai.chat.completions.create({
-      model: "gpt-4o-mini",
+      model: "gpt-4-turbo-preview",
      messages: [
        {
          role: "system",
-          content: "You are a product data validation assistant that helps ensure product data is accurate, consistent, and properly formatted."
+          content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
        },
        {
          role: "user",
-          content: prompt
+          content: fullPrompt
        }
      ],
      temperature: 0.3,