const express = require("express"); const router = express.Router(); const OpenAI = require("openai"); const fs = require("fs").promises; const path = require("path"); const dotenv = require("dotenv"); const mysql = require('mysql2/promise'); const { Client } = require('ssh2'); const { getDbConnection, closeAllConnections } = require('../utils/dbConnection'); // Import the optimized connection function // Ensure environment variables are loaded dotenv.config({ path: path.join(__dirname, "../../.env") }); const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); if (!process.env.OPENAI_API_KEY) { console.error("Warning: OPENAI_API_KEY is not set in environment variables"); } async function createResponsesCompletion(payload) { if (!openai.responses?.create) { throw new Error( "OpenAI client does not expose responses.create; please verify the openai SDK version." ); } return openai.responses.create(payload); } const AI_VALIDATION_SCHEMA_NAME = "ai_validation_response"; const FLEXIBLE_PRIMITIVE_SCHEMAS = [ { type: "string" }, { type: "number" }, { type: "boolean" }, { type: "null" }, ]; const FLEXIBLE_ARRAY_SCHEMA = { type: "array", items: { anyOf: FLEXIBLE_PRIMITIVE_SCHEMAS, }, }; const FLEXIBLE_OBJECT_SCHEMA = { type: "object", properties: {}, patternProperties: { ".+": { anyOf: [...FLEXIBLE_PRIMITIVE_SCHEMAS, FLEXIBLE_ARRAY_SCHEMA], }, }, additionalProperties: false, }; const FLEXIBLE_VALUE_SCHEMA = { anyOf: [...FLEXIBLE_PRIMITIVE_SCHEMAS, FLEXIBLE_ARRAY_SCHEMA, FLEXIBLE_OBJECT_SCHEMA], }; const AI_VALIDATION_JSON_SCHEMA = { type: "object", additionalProperties: false, required: [ "correctedData", "changes", "warnings", "summary", "metadata" ], properties: { correctedData: { type: "array", items: { type: "object", properties: {}, patternProperties: { ".+": FLEXIBLE_VALUE_SCHEMA, }, additionalProperties: false, }, }, changes: { type: "array", items: { type: "string", }, default: [], }, warnings: { type: "array", items: { type: "string", }, default: [], }, summary: { type: "string", default: "", }, metadata: { type: "object", properties: {}, patternProperties: { ".+": FLEXIBLE_VALUE_SCHEMA, }, additionalProperties: false, }, }, }; const AI_VALIDATION_TEXT_FORMAT = { type: "json_schema", name: AI_VALIDATION_SCHEMA_NAME, strict: true, schema: AI_VALIDATION_JSON_SCHEMA, }; // Debug endpoint for viewing prompt router.post("/debug", async (req, res) => { try { console.log("Debug POST endpoint called"); const { products } = req.body; console.log("Received products for debug:", { isArray: Array.isArray(products), length: products?.length, firstProduct: products?.[0], lastProduct: products?.[products?.length - 1], }); if (!Array.isArray(products)) { console.error("Invalid input: products is not an array"); return res.status(400).json({ error: "Products must be an array" }); } if (products.length === 0) { console.error("Invalid input: products array is empty"); return res.status(400).json({ error: "Products array cannot be empty" }); } // Clean the products array to remove any internal fields const cleanedProducts = products.map((product) => { const { __errors, __index, ...cleanProduct } = product; return cleanProduct; }); console.log("Processing debug request with cleaned products:", { length: cleanedProducts.length, sample: cleanedProducts[0], }); try { const debugResponse = await generateDebugResponse(cleanedProducts, res); // Get estimated processing time based on prompt length if (debugResponse && debugResponse.promptLength) { try { // Use the pool from the app const pool = req.app.locals.pool; if (!pool) { console.warn("⚠️ Local database pool not available for time estimates"); return; } try { // Instead of looking for similar prompt lengths, calculate an average processing rate const rateResults = await pool.query( `SELECT AVG(duration_seconds / prompt_length) as avg_rate_per_char, COUNT(*) as sample_count, AVG(duration_seconds) as avg_duration FROM ai_validation_performance` ); // Add estimated time to the response if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) { // Calculate estimated time based on the rate and current prompt length const rate = rateResults.rows[0].avg_rate_per_char; const estimatedSeconds = Math.max(15, Math.round(rate * debugResponse.promptLength)); debugResponse.estimatedProcessingTime = { seconds: estimatedSeconds, sampleCount: rateResults.rows[0].sample_count || 0, avgRate: rate, calculationMethod: "rate-based" }; console.log("📊 Calculated time estimate using rate-based method:", { rate: rate, promptLength: debugResponse.promptLength, estimatedSeconds: estimatedSeconds, sampleCount: rateResults.rows[0].sample_count }); } else { // Fallback: Calculate a simple estimate based on prompt length (1 second per 1000 characters) const estimatedSeconds = Math.max(15, Math.round(debugResponse.promptLength / 1000)); console.log("📊 No rate data available, using fallback calculation"); debugResponse.estimatedProcessingTime = { seconds: estimatedSeconds, sampleCount: 0, isEstimate: true, calculationMethod: "fallback" }; console.log("📊 Fallback time estimate:", debugResponse.estimatedProcessingTime); } } catch (queryError) { console.error("⚠️ Failed to query performance metrics:", queryError); // Check if table doesn't exist and log a more helpful message if (queryError.code === '42P01') { console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script."); } } } catch (timeEstimateError) { console.error("Error getting time estimate:", timeEstimateError); // Don't fail the request if time estimate fails } } return res.json(debugResponse); } catch (generateError) { console.error("Error generating debug response:", generateError); return res.status(500).json({ error: "Error generating debug response: " + generateError.message, stack: generateError.stack, name: generateError.name, code: generateError.code, sqlMessage: generateError.sqlMessage, }); } } catch (error) { console.error("Debug POST endpoint error:", error); res.status(500).json({ error: error.message, stack: error.stack, code: error.code || null, name: error.name || null }); } finally { try { await closeAllConnections(); } catch (closeError) { console.error("⚠️ Failed to close DB connections after debug request:", closeError); } } }); // Helper function to generate debug response async function generateDebugResponse(productsToUse, res) { let taxonomy = null; let mysqlConnection = null; let ssh = null; try { // Load taxonomy data first console.log("Loading taxonomy data..."); try { // Use optimized database connection const { connection, ssh: connSsh } = await getDbConnection(); mysqlConnection = connection; ssh = connSsh; console.log("MySQL connection established successfully using optimized connection"); taxonomy = await getTaxonomyData(mysqlConnection); console.log("Successfully loaded taxonomy data"); } catch (taxonomyError) { console.error("Failed to load taxonomy data:", taxonomyError); return res.status(500).json({ error: "Error fetching taxonomy data: " + taxonomyError.message, sqlMessage: taxonomyError.sqlMessage || null, sqlState: taxonomyError.sqlState || null, code: taxonomyError.code || null, errno: taxonomyError.errno || null, sql: taxonomyError.sql || null, }); } // Verify the taxonomy data structure console.log("Verifying taxonomy structure..."); if (!taxonomy) { console.error("Taxonomy data is null"); return res.status(500).json({ error: "Taxonomy data is null" }); } // Check if each taxonomy component exists const taxonomyComponents = [ "categories", "themes", "colors", "taxCodes", "sizeCategories", "suppliers", "companies", "artists", "lines", "subLines" ]; const missingComponents = taxonomyComponents.filter(comp => !taxonomy[comp]); if (missingComponents.length > 0) { console.error("Missing taxonomy components:", missingComponents); } // Log detailed taxonomy stats for debugging console.log("Taxonomy data loaded with details:", { categories: { length: taxonomy.categories?.length || 0, sample: taxonomy.categories?.length > 0 ? JSON.stringify(taxonomy.categories[0]).substring(0, 100) + "..." : null }, themes: { length: taxonomy.themes?.length || 0, sample: taxonomy.themes?.length > 0 ? JSON.stringify(taxonomy.themes[0]).substring(0, 100) + "..." : null }, colors: { length: taxonomy.colors?.length || 0, sample: taxonomy.colors?.length > 0 ? JSON.stringify(taxonomy.colors[0]) : null }, taxCodes: { length: taxonomy.taxCodes?.length || 0, sample: taxonomy.taxCodes?.length > 0 ? JSON.stringify(taxonomy.taxCodes[0]) : null }, sizeCategories: { length: taxonomy.sizeCategories?.length || 0, sample: taxonomy.sizeCategories?.length > 0 ? JSON.stringify(taxonomy.sizeCategories[0]) : null }, suppliers: { length: taxonomy.suppliers?.length || 0, sample: taxonomy.suppliers?.length > 0 ? JSON.stringify(taxonomy.suppliers[0]) : null }, companies: { length: taxonomy.companies?.length || 0, sample: taxonomy.companies?.length > 0 ? JSON.stringify(taxonomy.companies[0]) : null }, artists: { length: taxonomy.artists?.length || 0, sample: taxonomy.artists?.length > 0 ? JSON.stringify(taxonomy.artists[0]) : null } }); // Load the prompt using the same function used by validation console.log("Loading prompt..."); // Setup a new connection for loading the prompt // Use optimized connection instead of creating a new one const { connection: promptConnection } = await getDbConnection(); try { // Get the local PostgreSQL pool to fetch prompts const pool = res.app.locals.pool; if (!pool) { console.warn("⚠️ Local database pool not available for prompts"); throw new Error("Database connection not available"); } // First, fetch the system prompt for bulk validation const systemPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_system' AND company IS NULL `); if (systemPromptResult.rows.length === 0) { console.error("❌ No bulk_validation_system prompt found in database"); throw new Error("Missing required AI prompt: bulk_validation_system. Please add it in Settings > AI Validation Prompts."); } const systemPrompt = systemPromptResult.rows[0]; console.log("📝 Loaded bulk_validation_system prompt from database, ID:", systemPrompt.id); // Then, fetch the general prompt for bulk validation const generalPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_general' AND company IS NULL `); if (generalPromptResult.rows.length === 0) { console.error("❌ No bulk_validation_general prompt found in database"); throw new Error("Missing required AI prompt: bulk_validation_general. Please add it in Settings > AI Validation Prompts."); } // Get the general prompt text and info const generalPrompt = generalPromptResult.rows[0]; console.log("📝 Loaded bulk_validation_general prompt from database, ID:", generalPrompt.id); // Fetch company-specific prompts if we have products to validate let companyPrompts = []; if (productsToUse && Array.isArray(productsToUse)) { // Extract unique company IDs from products const companyIds = new Set(); productsToUse.forEach(product => { if (product.company) { companyIds.add(String(product.company)); } }); if (companyIds.size > 0) { console.log(`🔍 Found ${companyIds.size} unique companies in products:`, Array.from(companyIds)); // Fetch company-specific prompts for bulk validation const companyPromptsResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_company_specific' AND company = ANY($1) `, [Array.from(companyIds)]); companyPrompts = companyPromptsResult.rows; console.log(`📝 Loaded ${companyPrompts.length} bulk_validation_company_specific prompts`); } } // Find company names from taxonomy for the validation endpoint const companyPromptsWithNames = companyPrompts.map(prompt => { let companyName = "Unknown Company"; if (taxonomy.companies && Array.isArray(taxonomy.companies)) { const companyData = taxonomy.companies.find(company => String(company[0]) === String(prompt.company) ); if (companyData && companyData[1]) { companyName = companyData[1]; } } return { id: prompt.id, company: prompt.company, companyName: companyName, prompt_text: prompt.prompt_text }; }); // Now use loadPrompt to get the actual combined prompt const promptData = await loadPrompt(promptConnection, productsToUse, res.app.locals.pool); const fullUserPrompt = promptData.userContent + "\n" + JSON.stringify(productsToUse); const promptLength = promptData.systemInstructions.length + fullUserPrompt.length; // Store prompt length for performance metrics console.log("📝 Generated prompt length:", promptLength); console.log("📝 System instructions length:", promptData.systemInstructions.length); console.log("📝 User content length:", fullUserPrompt.length); // Format the messages as they would be sent to the API const apiMessages = [ { role: "system", content: promptData.systemInstructions }, { role: "user", content: fullUserPrompt } ]; // Create the response with taxonomy stats let categoriesCount = 0; try { categoriesCount = taxonomy?.categories?.length ? countItems(taxonomy.categories) : 0; } catch (countError) { console.error("Error counting categories:", countError); categoriesCount = taxonomy?.categories?.length || 0; // Fallback to simple length } const response = { taxonomyStats: taxonomy ? { categories: categoriesCount, themes: taxonomy.themes?.length || 0, colors: taxonomy.colors?.length || 0, taxCodes: taxonomy.taxCodes?.length || 0, sizeCategories: taxonomy.sizeCategories?.length || 0, suppliers: taxonomy.suppliers?.length || 0, companies: taxonomy.companies?.length || 0, artists: taxonomy.artists?.length || 0, // Add filtered counts when products are provided filtered: productsToUse ? { suppliers: taxonomy.suppliers?.filter(([id]) => productsToUse.some( (p) => Number(p.supplierid) === Number(id) ) )?.length || 0, companies: taxonomy.companies?.filter(([id]) => productsToUse.some((p) => Number(p.company) === Number(id)) )?.length || 0, artists: taxonomy.artists?.filter(([id]) => productsToUse.some((p) => Number(p.artist) === Number(id)) )?.length || 0, } : null, } : null, basePrompt: systemPrompt.prompt_text + "\n\n" + generalPrompt.prompt_text, sampleFullPrompt: fullUserPrompt, promptLength: promptLength, apiFormat: apiMessages, promptSources: { systemPrompt: { id: systemPrompt.id, prompt_text: systemPrompt.prompt_text }, generalPrompt: { id: generalPrompt.id, prompt_text: generalPrompt.prompt_text }, companyPrompts: companyPromptsWithNames } }; console.log("Sending response with taxonomy stats:", response.taxonomyStats); return response; } catch (promptLoadError) { console.error("Error loading prompt:", promptLoadError); throw promptLoadError; } } catch (error) { console.error("Error generating debug response:", error); return res.status(500).json({ error: error.message, stack: error.stack, sqlMessage: error.sqlMessage || null, sqlState: error.sqlState || null, code: error.code || null, errno: error.errno || null, taxonomyState: taxonomy ? "loaded" : "failed", }); } } // Helper function to count total items in hierarchical structure function countItems(items) { return items.reduce((count, item) => { return ( count + 1 + (item.subcategories ? countItems(item.subcategories) : 0) ); }, 0); } // Function to fetch and format taxonomy data async function getTaxonomyData(connection) { try { console.log("Starting taxonomy data fetch..."); // Fetch categories with hierarchy const [categories] = await connection.query(` SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order FROM product_categories s WHERE type=10 UNION ALL SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order FROM product_categories c JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE c.type=11 AND s.type=10 UNION ALL SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order FROM product_categories sc JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE sc.type=12 AND c.type=11 AND s.type=10 UNION ALL SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order FROM product_categories ssc JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10 ORDER BY level_order,cat_id; `); console.log("Categories fetched:", categories.length); // Fetch themes with hierarchy const [themes] = await connection.query(` SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order FROM product_categories t WHERE t.type=20 UNION ALL SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order FROM product_categories ts JOIN product_categories t ON ts.master_cat_id=t.cat_id WHERE ts.type=21 AND t.type=20 ORDER BY level_order,name `); console.log("Themes fetched:", themes.length); // Fetch colors const [colors] = await connection.query( `SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\`` ); console.log("Colors fetched:", colors.length); // Fetch tax codes const [taxCodes] = await connection.query( `SELECT tax_code_id, name FROM product_tax_codes ORDER BY name` ); console.log("Tax codes fetched:", taxCodes.length); // Fetch size categories const [sizeCategories] = await connection.query( `SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name` ); console.log("Size categories fetched:", sizeCategories.length); // Fetch suppliers const [suppliers] = await connection.query(` SELECT supplierid, companyname as name FROM suppliers WHERE companyname <> '' ORDER BY companyname `); console.log("Suppliers fetched:", suppliers.length); // Fetch companies (type 1) const [companies] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type = 1 ORDER BY name `); console.log("Companies fetched:", companies.length); // Fetch artists (type 40) const [artists] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type = 40 ORDER BY name `); console.log("Artists fetched:", artists.length); // Fetch lines (type 2) const [lines] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type = 2 ORDER BY name `); console.log("Lines fetched:", lines.length); // Fetch sub-lines (type 3) const [subLines] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type = 3 ORDER BY name `); console.log("Sub-lines fetched:", subLines.length); // Format categories into a hierarchical structure const formatHierarchy = (items, level = 1, parentId = null) => { return items .filter( (item) => item.level_order === level && item.master_cat_id === parentId ) .map((item) => { const children = formatHierarchy(items, level + 1, item.cat_id); return children.length > 0 ? [item.cat_id, item.name, children] : [item.cat_id, item.name]; }); }; // Format themes similarly but with only two levels const formatThemes = (items) => { return items .filter((item) => item.level_order === 1) .map((item) => { const subthemes = items .filter((subitem) => subitem.master_cat_id === item.cat_id) .map((subitem) => [subitem.cat_id, subitem.name]); return subthemes.length > 0 ? [item.cat_id, item.name, subthemes] : [item.cat_id, item.name]; }); }; // Log first item of each taxonomy category to check structure console.log("Sample category:", categories.length > 0 ? categories[0] : "No categories"); console.log("Sample theme:", themes.length > 0 ? themes[0] : "No themes"); console.log("Sample color:", colors.length > 0 ? colors[0] : "No colors"); const formattedData = { categories: formatHierarchy(categories), themes: formatThemes(themes), colors: colors.map((c) => [c.color, c.name, c.hex_color]), taxCodes: (taxCodes || []).map((tc) => [tc.tax_code_id, tc.name]), sizeCategories: (sizeCategories || []).map((sc) => [sc.cat_id, sc.name]), suppliers: suppliers.map((s) => [s.supplierid, s.name]), companies: companies.map((c) => [c.cat_id, c.name]), artists: artists.map((a) => [a.cat_id, a.name]), lines: lines.map((l) => [l.cat_id, l.name]), subLines: subLines.map((sl) => [sl.cat_id, sl.name]), }; // Check the formatted structure console.log("Formatted categories count:", formattedData.categories.length); console.log("Formatted themes count:", formattedData.themes.length); console.log("Formatted colors count:", formattedData.colors.length); return formattedData; } catch (error) { console.error("Error fetching taxonomy data:", error); console.error("Full error details:", { message: error.message, stack: error.stack, code: error.code, errno: error.errno, sqlMessage: error.sqlMessage, sqlState: error.sqlState, sql: error.sql }); // Instead of silently returning empty arrays, throw the error to be handled by the caller throw error; } } // Load prompts from database and inject taxonomy data async function loadPrompt(connection, productsToValidate = null, appPool = null) { try { // Get taxonomy data using the provided MySQL connection const taxonomy = await getTaxonomyData(connection); // Use the provided pool parameter instead of global.app const pool = appPool; if (!pool) { console.warn("⚠️ Local database pool not available for prompts"); throw new Error("Database connection not available"); } // Fetch the system prompt for bulk validation const systemPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_system' AND company IS NULL `); if (systemPromptResult.rows.length === 0) { console.error("❌ No bulk_validation_system prompt found in database"); throw new Error("Missing required AI prompt: bulk_validation_system. Please add it in Settings > AI Validation Prompts."); } const systemInstructions = systemPromptResult.rows[0].prompt_text; console.log("📝 Loaded bulk_validation_system prompt from database"); // Fetch the general prompt for bulk validation const generalPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_general' AND company IS NULL `); if (generalPromptResult.rows.length === 0) { console.error("❌ No bulk_validation_general prompt found in database"); throw new Error("Missing required AI prompt: bulk_validation_general. Please add it in Settings > AI Validation Prompts."); } // Get the general prompt text const basePrompt = generalPromptResult.rows[0].prompt_text; console.log("📝 Loaded bulk_validation_general prompt from database"); // Fetch company-specific prompts if we have products to validate let companyPrompts = []; if (productsToValidate && Array.isArray(productsToValidate)) { // Extract unique company IDs from products const companyIds = new Set(); productsToValidate.forEach(product => { if (product.company) { companyIds.add(String(product.company)); } }); if (companyIds.size > 0) { console.log(`🔍 Found ${companyIds.size} unique companies in products:`, Array.from(companyIds)); // Fetch company-specific prompts for bulk validation const companyPromptsResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_company_specific' AND company = ANY($1) `, [Array.from(companyIds)]); companyPrompts = companyPromptsResult.rows; console.log(`📝 Loaded ${companyPrompts.length} bulk_validation_company_specific prompts`); } } // Combine prompts - start with the general prompt let combinedPrompt = basePrompt; // Add any company-specific prompts with annotations if (companyPrompts.length > 0) { combinedPrompt += "\n\n--- COMPANY-SPECIFIC INSTRUCTIONS ---\n"; for (const prompt of companyPrompts) { // Find company name from taxonomy let companyName = "Unknown Company"; if (taxonomy.companies && Array.isArray(taxonomy.companies)) { const companyData = taxonomy.companies.find(company => String(company[0]) === String(prompt.company) ); if (companyData && companyData[1]) { companyName = companyData[1]; } } combinedPrompt += `\n[SPECIFIC TO COMPANY: ${companyName} (ID: ${prompt.company})]:\n${prompt.prompt_text}\n`; } combinedPrompt += "\n--- END COMPANY-SPECIFIC INSTRUCTIONS ---\n"; } // Products are required for validation if (!productsToValidate || !Array.isArray(productsToValidate) || productsToValidate.length === 0) { throw new Error("Products are required for prompt generation"); } console.log("Creating filtered prompt for products:", productsToValidate); // Extract unique values from products for non-core attributes const uniqueValues = { supplierIds: new Set(), companyIds: new Set(), artistIds: new Set(), lineIds: new Set(), subLineIds: new Set(), }; // Collect any values that exist in the products productsToValidate.forEach((product) => { Object.entries(product).forEach(([key, value]) => { if (value === undefined || value === null) return; // Map field names to their respective sets const fieldMap = { supplierid: "supplierIds", supplier: "supplierIds", company: "companyIds", artist: "artistIds", line: "lineIds", subline: "subLineIds", }; if (fieldMap[key]) { uniqueValues[fieldMap[key]].add(Number(value)); } }); }); console.log("Unique values collected:", { suppliers: Array.from(uniqueValues.supplierIds), companies: Array.from(uniqueValues.companyIds), artists: Array.from(uniqueValues.artistIds), lines: Array.from(uniqueValues.lineIds), subLines: Array.from(uniqueValues.subLineIds), }); // Create mixed taxonomy with filtered non-core data and full core data const mixedTaxonomy = { // Keep full data for core attributes categories: taxonomy.categories, themes: taxonomy.themes, colors: taxonomy.colors, taxCodes: taxonomy.taxCodes, sizeCategories: taxonomy.sizeCategories, // For non-core data, only include items that are actually used suppliers: taxonomy.suppliers.filter(([id]) => uniqueValues.supplierIds.has(Number(id)) ), companies: taxonomy.companies.filter(([id]) => uniqueValues.companyIds.has(Number(id)) ), artists: taxonomy.artists.filter(([id]) => uniqueValues.artistIds.has(Number(id)) ), lines: taxonomy.lines.filter(([id]) => uniqueValues.lineIds.has(Number(id)) ), subLines: taxonomy.subLines.filter(([id]) => uniqueValues.subLineIds.has(Number(id)) ), }; console.log("Filtered taxonomy counts:", { suppliers: mixedTaxonomy.suppliers.length, companies: mixedTaxonomy.companies.length, artists: mixedTaxonomy.artists.length, lines: mixedTaxonomy.lines.length, subLines: mixedTaxonomy.subLines.length, }); // Format taxonomy data for the prompt, only including sections with values const taxonomySection = ` All Available Categories: ${JSON.stringify(mixedTaxonomy.categories)} All Available Themes: ${JSON.stringify(mixedTaxonomy.themes)} All Available Colors: ${JSON.stringify(mixedTaxonomy.colors)} All Available Tax Codes: ${JSON.stringify(mixedTaxonomy.taxCodes)} All Available Size Categories: ${JSON.stringify(mixedTaxonomy.sizeCategories)}${ mixedTaxonomy.suppliers.length ? `\n\nSuppliers Used In This Data:\n${JSON.stringify( mixedTaxonomy.suppliers )}` : "" }${ mixedTaxonomy.companies.length ? `\n\nCompanies Used In This Data:\n${JSON.stringify( mixedTaxonomy.companies )}` : "" }${ mixedTaxonomy.artists.length ? `\n\nArtists Used In This Data:\n${JSON.stringify( mixedTaxonomy.artists )}` : "" }${ mixedTaxonomy.lines.length ? `\n\nLines Used In This Data:\n${JSON.stringify( mixedTaxonomy.lines )}` : "" }${ mixedTaxonomy.subLines.length ? `\n\nSub-Lines Used In This Data:\n${JSON.stringify( mixedTaxonomy.subLines )}` : "" } ----------Here is the product data to validate----------`; // Return both system instructions and user content separately return { systemInstructions, userContent: combinedPrompt + "\n" + taxonomySection }; } catch (error) { console.error("Error loading prompt:", error); throw error; // Re-throw to be handled by the calling function } } router.post("/validate", async (req, res) => { try { const { products } = req.body; const startTime = new Date(); // Track start time for performance metrics console.log("🔍 Received products for validation:", { isArray: Array.isArray(products), length: products?.length, firstProduct: products?.[0], lastProduct: products?.[products?.length - 1], }); if (!Array.isArray(products)) { console.error("❌ Invalid input: products is not an array"); return res.status(400).json({ error: "Products must be an array" }); } if (products.length === 0) { console.error("❌ Invalid input: products array is empty"); return res.status(400).json({ error: "Products array cannot be empty" }); } let ssh = null; let connection = null; let promptLength = 0; // Track prompt length for performance metrics try { // Use the optimized connection utility instead of direct SSH tunnel console.log("🔄 Setting up connection to production database using optimized connection..."); const { ssh: connSsh, connection: connDB } = await getDbConnection(); ssh = connSsh; connection = connDB; console.log("🔄 MySQL connection established successfully"); // Load the prompt with the products data to filter taxonomy console.log("🔄 Loading prompt with filtered taxonomy..."); const promptData = await loadPrompt(connection, products, req.app.locals.pool); const fullUserPrompt = promptData.userContent + "\n" + JSON.stringify(products); promptLength = promptData.systemInstructions.length + fullUserPrompt.length; // Store prompt length for performance metrics console.log("📝 Generated prompt length:", promptLength); console.log("📝 System instructions length:", promptData.systemInstructions.length); console.log("📝 User content length:", fullUserPrompt.length); console.log("🤖 Sending request to OpenAI Responses API..."); // GPT-5 Responses API Configuration: // - Using "gpt-5" (reasoning model) for complex product validation // - reasoning.effort: "medium" balances quality and speed (minimal, low, medium, high) // - text.verbosity: "medium" provides balanced output detail (low, medium, high) // - max_output_tokens: 20000 ensures space for large product batches // Note: Responses API is the recommended endpoint for GPT-5 models const completion = await createResponsesCompletion({ model: "gpt-5.2", input: [ { role: "developer", content: promptData.systemInstructions, }, { role: "user", content: fullUserPrompt, }, ], reasoning: { effort: "medium" }, text: { verbosity: "medium", format: AI_VALIDATION_TEXT_FORMAT, }, max_output_tokens: 50000, }); console.log("✅ Received response from OpenAI Responses API"); // Responses API structure: response has 'output' array with message objects const rawResponse = extractResponseText(completion); console.log("📄 Raw AI response length:", rawResponse ? rawResponse.length : 0); if (!rawResponse) { throw new Error("OpenAI response did not include any text output"); } const responseModel = completion.model; const usage = completion.usage || {}; // GPT-5 Responses API provides detailed token usage including reasoning tokens const tokenUsageSummary = { prompt: usage.input_tokens ?? usage.prompt_tokens ?? null, completion: usage.output_tokens ?? usage.completion_tokens ?? null, total: usage.total_tokens ?? null, // GPT-5 reasoning tokens are in output_tokens_details reasoning: usage.output_tokens_details?.reasoning_tokens ?? usage.completion_tokens_details?.reasoning_tokens ?? null, // Also capture text generation tokens separately from reasoning textGeneration: usage.output_tokens_details?.text_generation_tokens ?? usage.completion_tokens_details?.text_generation_tokens ?? null, cachedPrompt: usage.input_tokens_details?.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? null, // Capture audio tokens if present (future GPT-5 feature) audioTokens: usage.output_tokens_details?.audio_tokens ?? usage.completion_tokens_details?.audio_tokens ?? null, }; // Extract reasoning_effort and verbosity that were actually applied const reasoningEffortApplied = completion.reasoning?.effort || "medium"; const verbosityApplied = completion.text?.verbosity || "medium"; console.log("📊 Token usage summary:", tokenUsageSummary); console.log("🤖 Model dispatched:", responseModel); console.log("🧠 Reasoning effort applied:", reasoningEffortApplied); console.log("📝 Verbosity applied:", verbosityApplied); try { const normalizedResponse = normalizeJsonResponse(rawResponse); const aiResponse = JSON.parse(normalizedResponse); console.log( "🔄 Parsed AI response with keys:", Object.keys(aiResponse) ); // Merge AI changes back into original products // AI now only returns changed products and changed fields const mergedProducts = products.map((original, index) => ({ ...original })); const changeDetails = []; if (aiResponse.correctedData && Array.isArray(aiResponse.correctedData)) { console.log("📊 Processing AI changes - received", aiResponse.correctedData.length, "products with changes"); // Process each changed product from AI aiResponse.correctedData.forEach((changedProduct) => { // Find the matching original product using stable identifiers in priority order // Priority: upc > supplier_no > notions_no // These fields should not change during validation const identifiers = ['upc', 'supplier_no', 'notions_no']; let matchedIndex = -1; let matchedBy = null; for (const identifier of identifiers) { if (changedProduct[identifier] !== undefined && changedProduct[identifier] !== null && changedProduct[identifier] !== '') { matchedIndex = products.findIndex( (p) => p[identifier] !== undefined && p[identifier] !== null && p[identifier] !== '' && String(p[identifier]).trim() === String(changedProduct[identifier]).trim() ); if (matchedIndex !== -1) { matchedBy = identifier; console.log(`✓ Matched product by ${identifier}:`, changedProduct[identifier]); break; } } } // If no identifier match found, log an error with details if (matchedIndex === -1) { console.error("❌ Could not match changed product to original. Product identifiers:", { upc: changedProduct.upc, supplier_no: changedProduct.supplier_no, notions_no: changedProduct.notions_no }); return; } const original = products[matchedIndex]; const productChanges = { productIndex: matchedIndex, title: original.name || original.title || `Product ${matchedIndex + 1}`, changes: [] }; // Apply each changed field to the merged product Object.keys(changedProduct).forEach((key) => { // Check if the value actually changed if (JSON.stringify(original[key]) !== JSON.stringify(changedProduct[key])) { console.log(`\nProduct ${matchedIndex + 1} - Field ${key}:`); console.log(` - Original: ${JSON.stringify(original[key])}`); console.log(` - Corrected: ${JSON.stringify(changedProduct[key])}`); // Apply the change to merged product mergedProducts[matchedIndex][key] = changedProduct[key]; // Track the change productChanges.changes.push({ field: key, original: original[key], corrected: changedProduct[key] }); } }); // Only add to changeDetails if there were actual changes if (productChanges.changes.length > 0) { changeDetails.push(productChanges); } }); console.log(`📊 Applied changes to ${changeDetails.length} products`); } // Replace aiResponse.correctedData with the fully merged product array aiResponse.correctedData = mergedProducts; // Record performance metrics after successful validation const endTime = new Date(); let performanceMetrics = { promptLength, productCount: products.length, model: responseModel, tokenUsage: tokenUsageSummary, reasoningTokens: tokenUsageSummary.reasoning, reasoningEffort: reasoningEffortApplied, verbosity: verbosityApplied, }; try { // Use the local PostgreSQL pool from the app instead of the MySQL connection const pool = req.app.locals.pool; if (!pool) { console.warn("⚠️ Local database pool not available for recording metrics"); return; } try { // Insert performance data into the local PostgreSQL database await pool.query( `INSERT INTO ai_validation_performance (prompt_length, product_count, start_time, end_time) VALUES ($1, $2, $3, $4)`, [ promptLength, products.length, startTime.toISOString(), endTime.toISOString() ] ); console.log("📊 Performance metrics inserted into database"); // Query for average processing time based on similar prompt lengths try { const rateResults = await pool.query( `SELECT AVG(duration_seconds / prompt_length) as avg_rate_per_char, COUNT(*) as sample_count, AVG(duration_seconds) as avg_duration FROM ai_validation_performance` ); if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) { const rate = rateResults.rows[0].avg_rate_per_char; performanceMetrics.avgRate = rate; performanceMetrics.estimatedSeconds = Math.round(rate * promptLength); performanceMetrics.sampleCount = rateResults.rows[0].sample_count; performanceMetrics.calculationMethod = "rate-based"; } console.log("📊 Performance metrics with rate calculation:", performanceMetrics); } catch (queryError) { console.error("⚠️ Failed to query performance metrics:", queryError); } } catch (insertError) { console.error("⚠️ Failed to insert performance metrics:", insertError); // Check if table doesn't exist and log a more helpful message if (insertError.code === '42P01') { console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script."); } } } catch (metricError) { // Don't fail the request if metrics recording fails console.error("⚠️ Failed to record performance metrics:", metricError); } // Get sources of the prompts for tracking let promptSources = null; try { // Use the local PostgreSQL pool from the app const pool = req.app.locals.pool; if (!pool) { console.warn("⚠️ Local database pool not available for prompt sources"); } else { // Get system prompt for bulk validation const systemPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_system' AND company IS NULL `); // Get general prompt for bulk validation const generalPromptResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_general' AND company IS NULL `); // Extract unique company IDs from products const companyIds = new Set(); products.forEach(product => { if (product.company) { companyIds.add(String(product.company)); } }); let companyPrompts = []; if (companyIds.size > 0) { // Fetch company-specific prompts for bulk validation const companyPromptsResult = await pool.query(` SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_company_specific' AND company = ANY($1) `, [Array.from(companyIds)]); companyPrompts = companyPromptsResult.rows; } // Format company prompts for response // Note: Company names would require re-fetching taxonomy data // For now, we include company ID only const companyPromptsWithNames = companyPrompts.map(prompt => ({ id: prompt.id, company: prompt.company, prompt_text: prompt.prompt_text })); // Set prompt sources if (generalPromptResult.rows.length > 0 && systemPromptResult.rows.length > 0) { const generalPrompt = generalPromptResult.rows[0]; const systemPrompt = systemPromptResult.rows[0]; promptSources = { systemPrompt: { id: systemPrompt.id, prompt_text: systemPrompt.prompt_text }, generalPrompt: { id: generalPrompt.id, prompt_text: generalPrompt.prompt_text }, companyPrompts: companyPromptsWithNames }; } } } catch (promptSourceError) { console.error("⚠️ Error getting prompt sources:", promptSourceError); // Don't fail the entire validation if just prompt sources retrieval fails } // Include prompt sources in the response res.json({ success: true, ...aiResponse, changeDetails, performanceMetrics: performanceMetrics || { // Fallback: calculate a simple estimate promptLength, processingTimeSeconds: Math.max(15, Math.round(promptLength / 1000)), isEstimate: true, productCount: products.length, model: responseModel, tokenUsage: tokenUsageSummary, reasoningTokens: tokenUsageSummary.reasoning, reasoningEffort: reasoningEffortApplied, verbosity: verbosityApplied, }, promptSources, model: responseModel, tokenUsage: tokenUsageSummary, reasoningEffort: reasoningEffortApplied, verbosity: verbosityApplied, }); } catch (parseError) { console.error("❌ Error parsing AI response:", parseError); console.error("Raw response that failed to parse:", rawResponse); res.status(500).json({ success: false, error: "Error parsing AI response: " + parseError.message, }); } } catch (openaiError) { console.error("❌ OpenAI API Error:", openaiError); res.status(500).json({ success: false, error: "OpenAI API Error: " + openaiError.message, }); } } catch (error) { console.error("❌ AI Validation Error:", error); console.error("Error details:", { name: error.name, message: error.message, stack: error.stack, }); res.status(500).json({ success: false, error: error.message || "Error during AI validation", }); } finally { try { await closeAllConnections(); } catch (closeError) { console.error("⚠️ Failed to close DB connections after validation request:", closeError); } } }); // Test endpoint for direct database query of taxonomy data router.get("/test-taxonomy", async (req, res) => { try { console.log("Test taxonomy endpoint called"); let ssh = null; let connection = null; try { // Use the optimized connection utility instead of direct SSH tunnel console.log("🔄 Setting up connection to production database using optimized connection..."); const { ssh: connSsh, connection: connDB } = await getDbConnection(); ssh = connSsh; connection = connDB; console.log("MySQL connection established successfully for test"); const results = {}; // Test categories query try { const [categories] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type=10 LIMIT 5 `); results.categories = { success: true, count: categories.length, sample: categories.length > 0 ? categories[0] : null }; } catch (error) { results.categories = { success: false, error: error.message, sqlMessage: error.sqlMessage }; } // Test themes query try { const [themes] = await connection.query(` SELECT cat_id, name FROM product_categories WHERE type=20 LIMIT 5 `); results.themes = { success: true, count: themes.length, sample: themes.length > 0 ? themes[0] : null }; } catch (error) { results.themes = { success: false, error: error.message, sqlMessage: error.sqlMessage }; } // Test colors query try { const [colors] = await connection.query(` SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\` LIMIT 5 `); results.colors = { success: true, count: colors.length, sample: colors.length > 0 ? colors[0] : null }; } catch (error) { results.colors = { success: false, error: error.message, sqlMessage: error.sqlMessage }; } return res.json({ message: "Test taxonomy queries executed", results: results, timestamp: new Date().toISOString() }); } finally { try { await closeAllConnections(); } catch (closeError) { console.error("⚠️ Failed to close DB connections after test-taxonomy request:", closeError); } } } catch (error) { console.error("Test taxonomy endpoint error:", error); return res.status(500).json({ error: error.message, stack: error.stack }); } }); module.exports = router; function extractResponseText(response) { if (!response) return ""; const outputs = []; if (Array.isArray(response.output)) { outputs.push(...response.output); } if (Array.isArray(response.outputs)) { outputs.push(...response.outputs); } const segments = outputs.flatMap((output) => collectTextSegments(output?.content ?? output)); if (segments.length === 0 && typeof response.output_text === "string") { segments.push(response.output_text); } if (segments.length === 0 && response.choices?.length) { segments.push( ...collectTextSegments(response.choices?.[0]?.message?.content) ); } const text = segments.join("").trim(); return text; } function collectTextSegments(node) { if (node == null) return []; if (typeof node === "string" || typeof node === "number" || typeof node === "boolean") { return [String(node)]; } if (Array.isArray(node)) { return node.flatMap(collectTextSegments); } if (typeof node !== "object") { return []; } const segments = []; if (typeof node.text === "string") { segments.push(node.text); } else if (Array.isArray(node.text)) { segments.push(...node.text.flatMap(collectTextSegments)); } if (typeof node.content === "string") { segments.push(node.content); } else if (Array.isArray(node.content)) { segments.push(...node.content.flatMap(collectTextSegments)); } if (typeof node.output_text === "string") { segments.push(node.output_text); } else if (Array.isArray(node.output_text)) { segments.push(...node.output_text.flatMap(collectTextSegments)); } if (typeof node.value === "string") { segments.push(node.value); } if (typeof node.data === "string") { segments.push(node.data); } return segments; } function normalizeJsonResponse(text) { if (!text || typeof text !== 'string') return text; let cleaned = text.trim(); // Remove markdown code fences if present if (cleaned.startsWith('```')) { const firstLineBreak = cleaned.indexOf('\n'); if (firstLineBreak !== -1) { cleaned = cleaned.substring(firstLineBreak + 1); } else { cleaned = cleaned.replace(/^```/, ''); } const closingFenceIndex = cleaned.lastIndexOf('```'); if (closingFenceIndex !== -1) { cleaned = cleaned.substring(0, closingFenceIndex); } cleaned = cleaned.trim(); } // Attempt to repair truncated JSON // This handles cases where the AI response was cut off mid-response cleaned = repairTruncatedJson(cleaned); return cleaned; } /** * Attempt to repair truncated JSON by adding missing closing brackets/braces * This is a common issue when AI responses hit token limits */ function repairTruncatedJson(text) { if (!text || typeof text !== 'string') return text; // First, try parsing as-is try { JSON.parse(text); return text; // Valid JSON, no repair needed } catch (e) { // JSON is invalid, try to repair } let repaired = text.trim(); // Count opening and closing brackets/braces let braceCount = 0; // {} let bracketCount = 0; // [] let inString = false; let escapeNext = false; for (let i = 0; i < repaired.length; i++) { const char = repaired[i]; if (escapeNext) { escapeNext = false; continue; } if (char === '\\' && inString) { escapeNext = true; continue; } if (char === '"') { inString = !inString; continue; } if (!inString) { if (char === '{') braceCount++; else if (char === '}') braceCount--; else if (char === '[') bracketCount++; else if (char === ']') bracketCount--; } } // If we're still inside a string, close it if (inString) { repaired += '"'; } // Add missing closing brackets and braces // Close arrays first, then objects (reverse of typical nesting) while (bracketCount > 0) { repaired += ']'; bracketCount--; } while (braceCount > 0) { repaired += '}'; braceCount--; } // Try parsing the repaired JSON try { JSON.parse(repaired); console.log('✅ Successfully repaired truncated JSON'); return repaired; } catch (e) { // Repair failed, return original and let the caller handle the error console.log('⚠️ JSON repair attempt failed:', e.message); return text; } }