Fix line/subline regressions, add in AI validation tracking and improve AI results dialog

2025-02-26 00:38:17 -05:00
parent 2df5428712
commit 6b101a91f6
5 changed files with 758 additions and 85 deletions
--- a/inventory-server/src/prompts/product-validation.txt
+++ b/inventory-server/src/prompts/product-validation.txt
@@ -7,6 +7,8 @@ Your response should be a JSON object with the following structure:
  "warnings": [] // Array of strings with warnings or suggestions for manual review (see below for details)
 }

+IMPORTANT: For all fields that use IDs (categories, supplier, company, line, subline, ship_restrictions, tax_cat, artist, themes, etc.), you MUST return the ID values, not the display names. The system will handle converting IDs to display names.
+
 Using the provided guidelines, focus on:
 1. Correcting typos and any incorrect spelling or grammar
 2. Standardizing product names
@@ -93,7 +95,7 @@ Instructions: Always return a valid numerical tax code ID from the Available Tax
 Fields: size_cat
 Changes: Allowed to correct obvious errors or inconsistencies or to add missing values
 Required: Return if present in the original data or if not present and applicable. Do not return if not applicable (e.g. if no size categories apply based on what you know about the product).
-Instructions: If present or if applicable, return one valid numerical size category ID from the Available Size Categories array below. Give preference to the value provided, but correct it if another value is more accurate. A value is not required if none of the size categories apply, but it's important to include if one clearly applies, such as if the name contains 12x12, 6x8, 2oz, etc.
+Instructions: If present or if applicable, return one valid numerical size category ID from the Available Size Categories array below. Give preference to the value provided, but correct it if another value is more accurate. If the product name contains a match for one of the size categories (such as 12x12, 6x6, 2oz, etc) you MUST return that size category with the results. A value is not required if none of the size categories apply.

 Fields: themes
 Changes: Allowed to correct obvious errors or inconsistencies or to add missing values
--- a/inventory-server/src/routes/ai-validation.js
+++ b/inventory-server/src/routes/ai-validation.js
@@ -98,7 +98,51 @@ router.post("/debug", async (req, res) => {
    });

    try {
-      return await generateDebugResponse(cleanedProducts, res);
+      const debugResponse = await generateDebugResponse(cleanedProducts, res);
+      
+      // Get estimated processing time based on prompt length
+      if (debugResponse && debugResponse.promptLength) {
+        try {
+          // Use the pool from the app
+          const pool = req.app.locals.pool;
+          if (!pool) {
+            console.warn("⚠️ Local database pool not available for time estimates");
+            return;
+          }
+          
+          try {
+            const avgTimeResults = await pool.query(
+              `SELECT AVG(duration_seconds) as avg_duration, 
+                      COUNT(*) as sample_count
+               FROM ai_validation_performance 
+               WHERE prompt_length BETWEEN $1 * 0.8 AND $1 * 1.2`,
+              [debugResponse.promptLength]
+            );
+            
+            // Add estimated time to the response
+            if (avgTimeResults.rows && avgTimeResults.rows[0]) {
+              debugResponse.estimatedProcessingTime = {
+                seconds: avgTimeResults.rows[0].avg_duration || null,
+                sampleCount: avgTimeResults.rows[0].sample_count || 0
+              };
+              console.log("📊 Retrieved processing time estimate:", debugResponse.estimatedProcessingTime);
+            } else {
+              console.log("📊 No processing time estimates available for prompt length:", debugResponse.promptLength);
+            }
+          } catch (queryError) {
+            console.error("⚠️ Failed to query performance metrics:", queryError);
+            // Check if table doesn't exist and log a more helpful message
+            if (queryError.code === '42P01') {
+              console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
+            }
+          }
+        } catch (timeEstimateError) {
+          console.error("Error getting time estimate:", timeEstimateError);
+          // Don't fail the request if time estimate fails
+        }
+      }
+      
+      return res.json(debugResponse);
    } catch (generateError) {
      console.error("Error generating debug response:", generateError);
      return res.status(500).json({
@@ -271,7 +315,7 @@ async function generateDebugResponse(productsToUse, res) {
      };

      console.log("Sending response with taxonomy stats:", response.taxonomyStats);
-      return res.json(response);
+      return response;
    } finally {
      if (promptConnection) await promptConnection.end();
      if (promptTunnel.ssh) promptTunnel.ssh.end();
@@ -463,9 +507,7 @@ async function loadPrompt(connection, productsToValidate = null) {
    const taxonomy = await getTaxonomyData(connection);

    // Add system instructions to the prompt
-    const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone.
-
-`;
+    const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone. You have meticulous attention to detail and are a master at your craft.`;

    // If we have products to validate, create a filtered prompt
    if (productsToValidate) {
@@ -634,6 +676,7 @@ Here is the product data to validate:`;
 router.post("/validate", async (req, res) => {
  try {
    const { products } = req.body;
+    const startTime = new Date(); // Track start time for performance metrics

    console.log("🔍 Received products for validation:", {
      isArray: Array.isArray(products),
@@ -654,6 +697,7 @@ router.post("/validate", async (req, res) => {

    let ssh = null;
    let connection = null;
+    let promptLength = 0; // Track prompt length for performance metrics
    
    try {
      // Setup MySQL connection via SSH tunnel
@@ -672,7 +716,8 @@ router.post("/validate", async (req, res) => {
      console.log("🔄 Loading prompt with filtered taxonomy...");
      const prompt = await loadPrompt(connection, products);
      const fullPrompt = prompt + "\n" + JSON.stringify(products);
-      console.log("📝 Generated prompt length:", fullPrompt.length);
+      promptLength = fullPrompt.length; // Store prompt length for performance metrics
+      console.log("📝 Generated prompt length:", promptLength);

      console.log("🤖 Sending request to OpenAI...");
      const completion = await openai.chat.completions.create({
@@ -698,17 +743,27 @@ router.post("/validate", async (req, res) => {
          Object.keys(aiResponse)
        );

+        // Create a detailed comparison between original and corrected data
+        const changeDetails = [];
+        
        // Compare original and corrected data
        if (aiResponse.correctedData) {
          console.log("📊 Changes summary:");
          products.forEach((original, index) => {
            const corrected = aiResponse.correctedData[index];
            if (corrected) {
+              const productChanges = {
+                productIndex: index,
+                title: original.title || `Product ${index + 1}`,
+                changes: []
+              };
+              
              const changes = Object.keys(corrected).filter(
                (key) =>
                  JSON.stringify(original[key]) !==
                  JSON.stringify(corrected[key])
              );
+              
              if (changes.length > 0) {
                console.log(`\nProduct ${index + 1} changes:`);
                changes.forEach((key) => {
@@ -719,14 +774,87 @@ router.post("/validate", async (req, res) => {
                  console.log(
                    `    - Corrected: ${JSON.stringify(corrected[key])}`
                  );
+                  
+                  // Add to our detailed changes array
+                  productChanges.changes.push({
+                    field: key,
+                    original: original[key],
+                    corrected: corrected[key]
+                  });
                });
+                
+                // Only add products that have changes
+                if (productChanges.changes.length > 0) {
+                  changeDetails.push(productChanges);
+                }
              }
            }
          });
        }

+        // Record performance metrics after successful validation
+        const endTime = new Date();
+        let performanceMetrics = {
+          promptLength,
+          productCount: products.length,
+          processingTimeSeconds: (endTime - startTime) / 1000
+        };
+        
+        try {
+          // Use the local PostgreSQL pool from the app instead of the MySQL connection
+          const pool = req.app.locals.pool;
+          if (!pool) {
+            console.warn("⚠️ Local database pool not available for recording metrics");
+            return;
+          }
+          
+          try {
+            // Insert performance data into the local PostgreSQL database
+            await pool.query(
+              `INSERT INTO ai_validation_performance 
+               (prompt_length, product_count, start_time, end_time) 
+               VALUES ($1, $2, $3, $4)`,
+              [promptLength, products.length, startTime, endTime]
+            );
+            
+            console.log("📊 Performance metrics inserted into database");
+            
+            // Query for average processing time based on similar prompt lengths
+            try {
+              const avgTimeResults = await pool.query(
+                `SELECT AVG(duration_seconds) as avg_duration, 
+                        COUNT(*) as sample_count
+                 FROM ai_validation_performance 
+                 WHERE prompt_length BETWEEN $1 * 0.8 AND $1 * 1.2`,
+                [promptLength]
+              );
+              
+              if (avgTimeResults.rows && avgTimeResults.rows[0]) {
+                performanceMetrics.avgDuration = avgTimeResults.rows[0].avg_duration;
+                performanceMetrics.sampleCount = avgTimeResults.rows[0].sample_count;
+              }
+              
+              console.log("📊 Performance metrics retrieved:", performanceMetrics);
+            } catch (queryError) {
+              console.error("⚠️ Failed to query performance metrics:", queryError);
+            }
+          } catch (insertError) {
+            console.error("⚠️ Failed to insert performance metrics:", insertError);
+            // Check if table doesn't exist and log a more helpful message
+            if (insertError.code === '42P01') {
+              console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
+            }
+          }
+        } catch (metricError) {
+          // Don't fail the request if metrics recording fails
+          console.error("⚠️ Failed to record performance metrics:", metricError);
+        }
+        
+        // Include performance metrics in the response
        res.json({
          success: true,
+          changeDetails: changeDetails,
+          performanceMetrics,
          ...aiResponse,
        });
      } catch (parseError) {