Files
inventory/inventory-server/src/routes/ai-validation.js

1583 lines
56 KiB
JavaScript

const express = require("express");
const router = express.Router();
const OpenAI = require("openai");
const fs = require("fs").promises;
const path = require("path");
const dotenv = require("dotenv");
const mysql = require('mysql2/promise');
const { Client } = require('ssh2');
const { getDbConnection, closeAllConnections } = require('../utils/dbConnection'); // Import the optimized connection function
// Ensure environment variables are loaded
dotenv.config({ path: path.join(__dirname, "../../.env") });
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
if (!process.env.OPENAI_API_KEY) {
console.error("Warning: OPENAI_API_KEY is not set in environment variables");
}
async function createResponsesCompletion(payload) {
if (!openai.responses?.create) {
throw new Error(
"OpenAI client does not expose responses.create; please verify the openai SDK version."
);
}
return openai.responses.create(payload);
}
const AI_VALIDATION_SCHEMA_NAME = "ai_validation_response";
const FLEXIBLE_PRIMITIVE_SCHEMAS = [
{ type: "string" },
{ type: "number" },
{ type: "boolean" },
{ type: "null" },
];
const FLEXIBLE_ARRAY_SCHEMA = {
type: "array",
items: {
anyOf: FLEXIBLE_PRIMITIVE_SCHEMAS,
},
};
const FLEXIBLE_OBJECT_SCHEMA = {
type: "object",
properties: {},
patternProperties: {
".+": {
anyOf: [...FLEXIBLE_PRIMITIVE_SCHEMAS, FLEXIBLE_ARRAY_SCHEMA],
},
},
additionalProperties: false,
};
const FLEXIBLE_VALUE_SCHEMA = {
anyOf: [...FLEXIBLE_PRIMITIVE_SCHEMAS, FLEXIBLE_ARRAY_SCHEMA, FLEXIBLE_OBJECT_SCHEMA],
};
const AI_VALIDATION_JSON_SCHEMA = {
type: "object",
additionalProperties: false,
required: [
"correctedData",
"changes",
"warnings",
"summary",
"metadata"
],
properties: {
correctedData: {
type: "array",
items: {
type: "object",
properties: {},
patternProperties: {
".+": FLEXIBLE_VALUE_SCHEMA,
},
additionalProperties: false,
},
},
changes: {
type: "array",
items: {
type: "string",
},
default: [],
},
warnings: {
type: "array",
items: {
type: "string",
},
default: [],
},
summary: {
type: "string",
default: "",
},
metadata: {
type: "object",
properties: {},
patternProperties: {
".+": FLEXIBLE_VALUE_SCHEMA,
},
additionalProperties: false,
},
},
};
const AI_VALIDATION_TEXT_FORMAT = {
type: "json_schema",
name: AI_VALIDATION_SCHEMA_NAME,
strict: true,
schema: AI_VALIDATION_JSON_SCHEMA,
};
// Debug endpoint for viewing prompt
router.post("/debug", async (req, res) => {
try {
console.log("Debug POST endpoint called");
const { products } = req.body;
console.log("Received products for debug:", {
isArray: Array.isArray(products),
length: products?.length,
firstProduct: products?.[0],
lastProduct: products?.[products?.length - 1],
});
if (!Array.isArray(products)) {
console.error("Invalid input: products is not an array");
return res.status(400).json({ error: "Products must be an array" });
}
if (products.length === 0) {
console.error("Invalid input: products array is empty");
return res.status(400).json({ error: "Products array cannot be empty" });
}
// Clean the products array to remove any internal fields
const cleanedProducts = products.map((product) => {
const { __errors, __index, ...cleanProduct } = product;
return cleanProduct;
});
console.log("Processing debug request with cleaned products:", {
length: cleanedProducts.length,
sample: cleanedProducts[0],
});
try {
const debugResponse = await generateDebugResponse(cleanedProducts, res);
// Get estimated processing time based on prompt length
if (debugResponse && debugResponse.promptLength) {
try {
// Use the pool from the app
const pool = req.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for time estimates");
return;
}
try {
// Instead of looking for similar prompt lengths, calculate an average processing rate
const rateResults = await pool.query(
`SELECT
AVG(duration_seconds / prompt_length) as avg_rate_per_char,
COUNT(*) as sample_count,
AVG(duration_seconds) as avg_duration
FROM ai_validation_performance`
);
// Add estimated time to the response
if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) {
// Calculate estimated time based on the rate and current prompt length
const rate = rateResults.rows[0].avg_rate_per_char;
const estimatedSeconds = Math.max(15, Math.round(rate * debugResponse.promptLength));
debugResponse.estimatedProcessingTime = {
seconds: estimatedSeconds,
sampleCount: rateResults.rows[0].sample_count || 0,
avgRate: rate,
calculationMethod: "rate-based"
};
console.log("📊 Calculated time estimate using rate-based method:", {
rate: rate,
promptLength: debugResponse.promptLength,
estimatedSeconds: estimatedSeconds,
sampleCount: rateResults.rows[0].sample_count
});
} else {
// Fallback: Calculate a simple estimate based on prompt length (1 second per 1000 characters)
const estimatedSeconds = Math.max(15, Math.round(debugResponse.promptLength / 1000));
console.log("📊 No rate data available, using fallback calculation");
debugResponse.estimatedProcessingTime = {
seconds: estimatedSeconds,
sampleCount: 0,
isEstimate: true,
calculationMethod: "fallback"
};
console.log("📊 Fallback time estimate:", debugResponse.estimatedProcessingTime);
}
} catch (queryError) {
console.error("⚠️ Failed to query performance metrics:", queryError);
// Check if table doesn't exist and log a more helpful message
if (queryError.code === '42P01') {
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
}
}
} catch (timeEstimateError) {
console.error("Error getting time estimate:", timeEstimateError);
// Don't fail the request if time estimate fails
}
}
return res.json(debugResponse);
} catch (generateError) {
console.error("Error generating debug response:", generateError);
return res.status(500).json({
error: "Error generating debug response: " + generateError.message,
stack: generateError.stack,
name: generateError.name,
code: generateError.code,
sqlMessage: generateError.sqlMessage,
});
}
} catch (error) {
console.error("Debug POST endpoint error:", error);
res.status(500).json({
error: error.message,
stack: error.stack,
code: error.code || null,
name: error.name || null
});
} finally {
try {
await closeAllConnections();
} catch (closeError) {
console.error("⚠️ Failed to close DB connections after debug request:", closeError);
}
}
});
// Helper function to generate debug response
async function generateDebugResponse(productsToUse, res) {
let taxonomy = null;
let mysqlConnection = null;
let ssh = null;
try {
// Load taxonomy data first
console.log("Loading taxonomy data...");
try {
// Use optimized database connection
const { connection, ssh: connSsh } = await getDbConnection();
mysqlConnection = connection;
ssh = connSsh;
console.log("MySQL connection established successfully using optimized connection");
taxonomy = await getTaxonomyData(mysqlConnection);
console.log("Successfully loaded taxonomy data");
} catch (taxonomyError) {
console.error("Failed to load taxonomy data:", taxonomyError);
return res.status(500).json({
error: "Error fetching taxonomy data: " + taxonomyError.message,
sqlMessage: taxonomyError.sqlMessage || null,
sqlState: taxonomyError.sqlState || null,
code: taxonomyError.code || null,
errno: taxonomyError.errno || null,
sql: taxonomyError.sql || null,
});
}
// Verify the taxonomy data structure
console.log("Verifying taxonomy structure...");
if (!taxonomy) {
console.error("Taxonomy data is null");
return res.status(500).json({ error: "Taxonomy data is null" });
}
// Check if each taxonomy component exists
const taxonomyComponents = [
"categories", "themes", "colors", "taxCodes", "sizeCategories",
"suppliers", "companies", "artists", "lines", "subLines"
];
const missingComponents = taxonomyComponents.filter(comp => !taxonomy[comp]);
if (missingComponents.length > 0) {
console.error("Missing taxonomy components:", missingComponents);
}
// Log detailed taxonomy stats for debugging
console.log("Taxonomy data loaded with details:", {
categories: {
length: taxonomy.categories?.length || 0,
sample: taxonomy.categories?.length > 0 ? JSON.stringify(taxonomy.categories[0]).substring(0, 100) + "..." : null
},
themes: {
length: taxonomy.themes?.length || 0,
sample: taxonomy.themes?.length > 0 ? JSON.stringify(taxonomy.themes[0]).substring(0, 100) + "..." : null
},
colors: {
length: taxonomy.colors?.length || 0,
sample: taxonomy.colors?.length > 0 ? JSON.stringify(taxonomy.colors[0]) : null
},
taxCodes: {
length: taxonomy.taxCodes?.length || 0,
sample: taxonomy.taxCodes?.length > 0 ? JSON.stringify(taxonomy.taxCodes[0]) : null
},
sizeCategories: {
length: taxonomy.sizeCategories?.length || 0,
sample: taxonomy.sizeCategories?.length > 0 ? JSON.stringify(taxonomy.sizeCategories[0]) : null
},
suppliers: {
length: taxonomy.suppliers?.length || 0,
sample: taxonomy.suppliers?.length > 0 ? JSON.stringify(taxonomy.suppliers[0]) : null
},
companies: {
length: taxonomy.companies?.length || 0,
sample: taxonomy.companies?.length > 0 ? JSON.stringify(taxonomy.companies[0]) : null
},
artists: {
length: taxonomy.artists?.length || 0,
sample: taxonomy.artists?.length > 0 ? JSON.stringify(taxonomy.artists[0]) : null
}
});
// Load the prompt using the same function used by validation
console.log("Loading prompt...");
// Setup a new connection for loading the prompt
// Use optimized connection instead of creating a new one
const { connection: promptConnection } = await getDbConnection();
try {
// Get the local PostgreSQL pool to fetch prompts
const pool = res.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for prompts");
throw new Error("Database connection not available");
}
// First, fetch the system prompt for bulk validation
const systemPromptResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_system' AND company IS NULL
`);
if (systemPromptResult.rows.length === 0) {
console.error("❌ No bulk_validation_system prompt found in database");
throw new Error("Missing required AI prompt: bulk_validation_system. Please add it in Settings > AI Validation Prompts.");
}
const systemPrompt = systemPromptResult.rows[0];
console.log("📝 Loaded bulk_validation_system prompt from database, ID:", systemPrompt.id);
// Then, fetch the general prompt for bulk validation
const generalPromptResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_general' AND company IS NULL
`);
if (generalPromptResult.rows.length === 0) {
console.error("❌ No bulk_validation_general prompt found in database");
throw new Error("Missing required AI prompt: bulk_validation_general. Please add it in Settings > AI Validation Prompts.");
}
// Get the general prompt text and info
const generalPrompt = generalPromptResult.rows[0];
console.log("📝 Loaded bulk_validation_general prompt from database, ID:", generalPrompt.id);
// Fetch company-specific prompts if we have products to validate
let companyPrompts = [];
if (productsToUse && Array.isArray(productsToUse)) {
// Extract unique company IDs from products
const companyIds = new Set();
productsToUse.forEach(product => {
if (product.company) {
companyIds.add(String(product.company));
}
});
if (companyIds.size > 0) {
console.log(`🔍 Found ${companyIds.size} unique companies in products:`, Array.from(companyIds));
// Fetch company-specific prompts for bulk validation
const companyPromptsResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_company_specific'
AND company = ANY($1)
`, [Array.from(companyIds)]);
companyPrompts = companyPromptsResult.rows;
console.log(`📝 Loaded ${companyPrompts.length} bulk_validation_company_specific prompts`);
}
}
// Find company names from taxonomy for the validation endpoint
const companyPromptsWithNames = companyPrompts.map(prompt => {
let companyName = "Unknown Company";
if (taxonomy.companies && Array.isArray(taxonomy.companies)) {
const companyData = taxonomy.companies.find(company =>
String(company[0]) === String(prompt.company)
);
if (companyData && companyData[1]) {
companyName = companyData[1];
}
}
return {
id: prompt.id,
company: prompt.company,
companyName: companyName,
prompt_text: prompt.prompt_text
};
});
// Now use loadPrompt to get the actual combined prompt
const promptData = await loadPrompt(promptConnection, productsToUse, res.app.locals.pool);
const fullUserPrompt = promptData.userContent + "\n" + JSON.stringify(productsToUse);
const promptLength = promptData.systemInstructions.length + fullUserPrompt.length; // Store prompt length for performance metrics
console.log("📝 Generated prompt length:", promptLength);
console.log("📝 System instructions length:", promptData.systemInstructions.length);
console.log("📝 User content length:", fullUserPrompt.length);
// Format the messages as they would be sent to the API
const apiMessages = [
{
role: "system",
content: promptData.systemInstructions
},
{
role: "user",
content: fullUserPrompt
}
];
// Create the response with taxonomy stats
let categoriesCount = 0;
try {
categoriesCount = taxonomy?.categories?.length ? countItems(taxonomy.categories) : 0;
} catch (countError) {
console.error("Error counting categories:", countError);
categoriesCount = taxonomy?.categories?.length || 0; // Fallback to simple length
}
const response = {
taxonomyStats: taxonomy
? {
categories: categoriesCount,
themes: taxonomy.themes?.length || 0,
colors: taxonomy.colors?.length || 0,
taxCodes: taxonomy.taxCodes?.length || 0,
sizeCategories: taxonomy.sizeCategories?.length || 0,
suppliers: taxonomy.suppliers?.length || 0,
companies: taxonomy.companies?.length || 0,
artists: taxonomy.artists?.length || 0,
// Add filtered counts when products are provided
filtered: productsToUse
? {
suppliers: taxonomy.suppliers?.filter(([id]) =>
productsToUse.some(
(p) => Number(p.supplierid) === Number(id)
)
)?.length || 0,
companies: taxonomy.companies?.filter(([id]) =>
productsToUse.some((p) => Number(p.company) === Number(id))
)?.length || 0,
artists: taxonomy.artists?.filter(([id]) =>
productsToUse.some((p) => Number(p.artist) === Number(id))
)?.length || 0,
}
: null,
}
: null,
basePrompt: systemPrompt.prompt_text + "\n\n" + generalPrompt.prompt_text,
sampleFullPrompt: fullUserPrompt,
promptLength: promptLength,
apiFormat: apiMessages,
promptSources: {
systemPrompt: {
id: systemPrompt.id,
prompt_text: systemPrompt.prompt_text
},
generalPrompt: {
id: generalPrompt.id,
prompt_text: generalPrompt.prompt_text
},
companyPrompts: companyPromptsWithNames
}
};
console.log("Sending response with taxonomy stats:", response.taxonomyStats);
return response;
} catch (promptLoadError) {
console.error("Error loading prompt:", promptLoadError);
throw promptLoadError;
}
} catch (error) {
console.error("Error generating debug response:", error);
return res.status(500).json({
error: error.message,
stack: error.stack,
sqlMessage: error.sqlMessage || null,
sqlState: error.sqlState || null,
code: error.code || null,
errno: error.errno || null,
taxonomyState: taxonomy ? "loaded" : "failed",
});
}
}
// Helper function to count total items in hierarchical structure
function countItems(items) {
return items.reduce((count, item) => {
return (
count + 1 + (item.subcategories ? countItems(item.subcategories) : 0)
);
}, 0);
}
// Function to fetch and format taxonomy data
async function getTaxonomyData(connection) {
try {
console.log("Starting taxonomy data fetch...");
// Fetch categories with hierarchy
const [categories] = await connection.query(`
SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order FROM product_categories s WHERE type=10 UNION ALL SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order FROM product_categories c JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE c.type=11 AND s.type=10 UNION ALL SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order FROM product_categories sc JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE sc.type=12 AND c.type=11 AND s.type=10 UNION ALL SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order FROM product_categories ssc JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10 ORDER BY level_order,cat_id;
`);
console.log("Categories fetched:", categories.length);
// Fetch themes with hierarchy
const [themes] = await connection.query(`
SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order FROM product_categories t WHERE t.type=20 UNION ALL SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order FROM product_categories ts JOIN product_categories t ON ts.master_cat_id=t.cat_id WHERE ts.type=21 AND t.type=20 ORDER BY level_order,name
`);
console.log("Themes fetched:", themes.length);
// Fetch colors
const [colors] = await connection.query(
`SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\``
);
console.log("Colors fetched:", colors.length);
// Fetch tax codes
const [taxCodes] = await connection.query(
`SELECT tax_code_id, name FROM product_tax_codes ORDER BY name`
);
console.log("Tax codes fetched:", taxCodes.length);
// Fetch size categories
const [sizeCategories] = await connection.query(
`SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name`
);
console.log("Size categories fetched:", sizeCategories.length);
// Fetch suppliers
const [suppliers] = await connection.query(`
SELECT supplierid, companyname as name
FROM suppliers
WHERE companyname <> ''
ORDER BY companyname
`);
console.log("Suppliers fetched:", suppliers.length);
// Fetch companies (type 1)
const [companies] = await connection.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 1
ORDER BY name
`);
console.log("Companies fetched:", companies.length);
// Fetch artists (type 40)
const [artists] = await connection.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 40
ORDER BY name
`);
console.log("Artists fetched:", artists.length);
// Fetch lines (type 2)
const [lines] = await connection.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 2
ORDER BY name
`);
console.log("Lines fetched:", lines.length);
// Fetch sub-lines (type 3)
const [subLines] = await connection.query(`
SELECT cat_id, name
FROM product_categories
WHERE type = 3
ORDER BY name
`);
console.log("Sub-lines fetched:", subLines.length);
// Format categories into a hierarchical structure
const formatHierarchy = (items, level = 1, parentId = null) => {
return items
.filter(
(item) =>
item.level_order === level && item.master_cat_id === parentId
)
.map((item) => {
const children = formatHierarchy(items, level + 1, item.cat_id);
return children.length > 0
? [item.cat_id, item.name, children]
: [item.cat_id, item.name];
});
};
// Format themes similarly but with only two levels
const formatThemes = (items) => {
return items
.filter((item) => item.level_order === 1)
.map((item) => {
const subthemes = items
.filter((subitem) => subitem.master_cat_id === item.cat_id)
.map((subitem) => [subitem.cat_id, subitem.name]);
return subthemes.length > 0
? [item.cat_id, item.name, subthemes]
: [item.cat_id, item.name];
});
};
// Log first item of each taxonomy category to check structure
console.log("Sample category:", categories.length > 0 ? categories[0] : "No categories");
console.log("Sample theme:", themes.length > 0 ? themes[0] : "No themes");
console.log("Sample color:", colors.length > 0 ? colors[0] : "No colors");
const formattedData = {
categories: formatHierarchy(categories),
themes: formatThemes(themes),
colors: colors.map((c) => [c.color, c.name, c.hex_color]),
taxCodes: (taxCodes || []).map((tc) => [tc.tax_code_id, tc.name]),
sizeCategories: (sizeCategories || []).map((sc) => [sc.cat_id, sc.name]),
suppliers: suppliers.map((s) => [s.supplierid, s.name]),
companies: companies.map((c) => [c.cat_id, c.name]),
artists: artists.map((a) => [a.cat_id, a.name]),
lines: lines.map((l) => [l.cat_id, l.name]),
subLines: subLines.map((sl) => [sl.cat_id, sl.name]),
};
// Check the formatted structure
console.log("Formatted categories count:", formattedData.categories.length);
console.log("Formatted themes count:", formattedData.themes.length);
console.log("Formatted colors count:", formattedData.colors.length);
return formattedData;
} catch (error) {
console.error("Error fetching taxonomy data:", error);
console.error("Full error details:", {
message: error.message,
stack: error.stack,
code: error.code,
errno: error.errno,
sqlMessage: error.sqlMessage,
sqlState: error.sqlState,
sql: error.sql
});
// Instead of silently returning empty arrays, throw the error to be handled by the caller
throw error;
}
}
// Load prompts from database and inject taxonomy data
async function loadPrompt(connection, productsToValidate = null, appPool = null) {
try {
// Get taxonomy data using the provided MySQL connection
const taxonomy = await getTaxonomyData(connection);
// Use the provided pool parameter instead of global.app
const pool = appPool;
if (!pool) {
console.warn("⚠️ Local database pool not available for prompts");
throw new Error("Database connection not available");
}
// Fetch the system prompt for bulk validation
const systemPromptResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_system' AND company IS NULL
`);
if (systemPromptResult.rows.length === 0) {
console.error("❌ No bulk_validation_system prompt found in database");
throw new Error("Missing required AI prompt: bulk_validation_system. Please add it in Settings > AI Validation Prompts.");
}
const systemInstructions = systemPromptResult.rows[0].prompt_text;
console.log("📝 Loaded bulk_validation_system prompt from database");
// Fetch the general prompt for bulk validation
const generalPromptResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_general' AND company IS NULL
`);
if (generalPromptResult.rows.length === 0) {
console.error("❌ No bulk_validation_general prompt found in database");
throw new Error("Missing required AI prompt: bulk_validation_general. Please add it in Settings > AI Validation Prompts.");
}
// Get the general prompt text
const basePrompt = generalPromptResult.rows[0].prompt_text;
console.log("📝 Loaded bulk_validation_general prompt from database");
// Fetch company-specific prompts if we have products to validate
let companyPrompts = [];
if (productsToValidate && Array.isArray(productsToValidate)) {
// Extract unique company IDs from products
const companyIds = new Set();
productsToValidate.forEach(product => {
if (product.company) {
companyIds.add(String(product.company));
}
});
if (companyIds.size > 0) {
console.log(`🔍 Found ${companyIds.size} unique companies in products:`, Array.from(companyIds));
// Fetch company-specific prompts for bulk validation
const companyPromptsResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_company_specific'
AND company = ANY($1)
`, [Array.from(companyIds)]);
companyPrompts = companyPromptsResult.rows;
console.log(`📝 Loaded ${companyPrompts.length} bulk_validation_company_specific prompts`);
}
}
// Combine prompts - start with the general prompt
let combinedPrompt = basePrompt;
// Add any company-specific prompts with annotations
if (companyPrompts.length > 0) {
combinedPrompt += "\n\n--- COMPANY-SPECIFIC INSTRUCTIONS ---\n";
for (const prompt of companyPrompts) {
// Find company name from taxonomy
let companyName = "Unknown Company";
if (taxonomy.companies && Array.isArray(taxonomy.companies)) {
const companyData = taxonomy.companies.find(company =>
String(company[0]) === String(prompt.company)
);
if (companyData && companyData[1]) {
companyName = companyData[1];
}
}
combinedPrompt += `\n[SPECIFIC TO COMPANY: ${companyName} (ID: ${prompt.company})]:\n${prompt.prompt_text}\n`;
}
combinedPrompt += "\n--- END COMPANY-SPECIFIC INSTRUCTIONS ---\n";
}
// Products are required for validation
if (!productsToValidate || !Array.isArray(productsToValidate) || productsToValidate.length === 0) {
throw new Error("Products are required for prompt generation");
}
console.log("Creating filtered prompt for products:", productsToValidate);
// Extract unique values from products for non-core attributes
const uniqueValues = {
supplierIds: new Set(),
companyIds: new Set(),
artistIds: new Set(),
lineIds: new Set(),
subLineIds: new Set(),
};
// Collect any values that exist in the products
productsToValidate.forEach((product) => {
Object.entries(product).forEach(([key, value]) => {
if (value === undefined || value === null) return;
// Map field names to their respective sets
const fieldMap = {
supplierid: "supplierIds",
supplier: "supplierIds",
company: "companyIds",
artist: "artistIds",
line: "lineIds",
subline: "subLineIds",
};
if (fieldMap[key]) {
uniqueValues[fieldMap[key]].add(Number(value));
}
});
});
console.log("Unique values collected:", {
suppliers: Array.from(uniqueValues.supplierIds),
companies: Array.from(uniqueValues.companyIds),
artists: Array.from(uniqueValues.artistIds),
lines: Array.from(uniqueValues.lineIds),
subLines: Array.from(uniqueValues.subLineIds),
});
// Create mixed taxonomy with filtered non-core data and full core data
const mixedTaxonomy = {
// Keep full data for core attributes
categories: taxonomy.categories,
themes: taxonomy.themes,
colors: taxonomy.colors,
taxCodes: taxonomy.taxCodes,
sizeCategories: taxonomy.sizeCategories,
// For non-core data, only include items that are actually used
suppliers: taxonomy.suppliers.filter(([id]) =>
uniqueValues.supplierIds.has(Number(id))
),
companies: taxonomy.companies.filter(([id]) =>
uniqueValues.companyIds.has(Number(id))
),
artists: taxonomy.artists.filter(([id]) =>
uniqueValues.artistIds.has(Number(id))
),
lines: taxonomy.lines.filter(([id]) =>
uniqueValues.lineIds.has(Number(id))
),
subLines: taxonomy.subLines.filter(([id]) =>
uniqueValues.subLineIds.has(Number(id))
),
};
console.log("Filtered taxonomy counts:", {
suppliers: mixedTaxonomy.suppliers.length,
companies: mixedTaxonomy.companies.length,
artists: mixedTaxonomy.artists.length,
lines: mixedTaxonomy.lines.length,
subLines: mixedTaxonomy.subLines.length,
});
// Format taxonomy data for the prompt, only including sections with values
const taxonomySection = `
All Available Categories:
${JSON.stringify(mixedTaxonomy.categories)}
All Available Themes:
${JSON.stringify(mixedTaxonomy.themes)}
All Available Colors:
${JSON.stringify(mixedTaxonomy.colors)}
All Available Tax Codes:
${JSON.stringify(mixedTaxonomy.taxCodes)}
All Available Size Categories:
${JSON.stringify(mixedTaxonomy.sizeCategories)}${
mixedTaxonomy.suppliers.length
? `\n\nSuppliers Used In This Data:\n${JSON.stringify(
mixedTaxonomy.suppliers
)}`
: ""
}${
mixedTaxonomy.companies.length
? `\n\nCompanies Used In This Data:\n${JSON.stringify(
mixedTaxonomy.companies
)}`
: ""
}${
mixedTaxonomy.artists.length
? `\n\nArtists Used In This Data:\n${JSON.stringify(
mixedTaxonomy.artists
)}`
: ""
}${
mixedTaxonomy.lines.length
? `\n\nLines Used In This Data:\n${JSON.stringify(
mixedTaxonomy.lines
)}`
: ""
}${
mixedTaxonomy.subLines.length
? `\n\nSub-Lines Used In This Data:\n${JSON.stringify(
mixedTaxonomy.subLines
)}`
: ""
}
----------Here is the product data to validate----------`;
// Return both system instructions and user content separately
return {
systemInstructions,
userContent: combinedPrompt + "\n" + taxonomySection
};
} catch (error) {
console.error("Error loading prompt:", error);
throw error; // Re-throw to be handled by the calling function
}
}
router.post("/validate", async (req, res) => {
try {
const { products } = req.body;
const startTime = new Date(); // Track start time for performance metrics
console.log("🔍 Received products for validation:", {
isArray: Array.isArray(products),
length: products?.length,
firstProduct: products?.[0],
lastProduct: products?.[products?.length - 1],
});
if (!Array.isArray(products)) {
console.error("❌ Invalid input: products is not an array");
return res.status(400).json({ error: "Products must be an array" });
}
if (products.length === 0) {
console.error("❌ Invalid input: products array is empty");
return res.status(400).json({ error: "Products array cannot be empty" });
}
let ssh = null;
let connection = null;
let promptLength = 0; // Track prompt length for performance metrics
try {
// Use the optimized connection utility instead of direct SSH tunnel
console.log("🔄 Setting up connection to production database using optimized connection...");
const { ssh: connSsh, connection: connDB } = await getDbConnection();
ssh = connSsh;
connection = connDB;
console.log("🔄 MySQL connection established successfully");
// Load the prompt with the products data to filter taxonomy
console.log("🔄 Loading prompt with filtered taxonomy...");
const promptData = await loadPrompt(connection, products, req.app.locals.pool);
const fullUserPrompt = promptData.userContent + "\n" + JSON.stringify(products);
promptLength = promptData.systemInstructions.length + fullUserPrompt.length; // Store prompt length for performance metrics
console.log("📝 Generated prompt length:", promptLength);
console.log("📝 System instructions length:", promptData.systemInstructions.length);
console.log("📝 User content length:", fullUserPrompt.length);
console.log("🤖 Sending request to OpenAI Responses API...");
// GPT-5 Responses API Configuration:
// - Using "gpt-5" (reasoning model) for complex product validation
// - reasoning.effort: "medium" balances quality and speed (minimal, low, medium, high)
// - text.verbosity: "medium" provides balanced output detail (low, medium, high)
// - max_output_tokens: 20000 ensures space for large product batches
// Note: Responses API is the recommended endpoint for GPT-5 models
const completion = await createResponsesCompletion({
model: "gpt-5.2",
input: [
{
role: "developer",
content: promptData.systemInstructions,
},
{
role: "user",
content: fullUserPrompt,
},
],
reasoning: {
effort: "medium"
},
text: {
verbosity: "medium",
format: AI_VALIDATION_TEXT_FORMAT,
},
max_output_tokens: 50000,
});
console.log("✅ Received response from OpenAI Responses API");
// Responses API structure: response has 'output' array with message objects
const rawResponse = extractResponseText(completion);
console.log("📄 Raw AI response length:", rawResponse ? rawResponse.length : 0);
if (!rawResponse) {
throw new Error("OpenAI response did not include any text output");
}
const responseModel = completion.model;
const usage = completion.usage || {};
// GPT-5 Responses API provides detailed token usage including reasoning tokens
const tokenUsageSummary = {
prompt: usage.input_tokens ?? usage.prompt_tokens ?? null,
completion: usage.output_tokens ?? usage.completion_tokens ?? null,
total: usage.total_tokens ?? null,
// GPT-5 reasoning tokens are in output_tokens_details
reasoning: usage.output_tokens_details?.reasoning_tokens ?? usage.completion_tokens_details?.reasoning_tokens ?? null,
// Also capture text generation tokens separately from reasoning
textGeneration: usage.output_tokens_details?.text_generation_tokens ?? usage.completion_tokens_details?.text_generation_tokens ?? null,
cachedPrompt: usage.input_tokens_details?.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? null,
// Capture audio tokens if present (future GPT-5 feature)
audioTokens: usage.output_tokens_details?.audio_tokens ?? usage.completion_tokens_details?.audio_tokens ?? null,
};
// Extract reasoning_effort and verbosity that were actually applied
const reasoningEffortApplied = completion.reasoning?.effort || "medium";
const verbosityApplied = completion.text?.verbosity || "medium";
console.log("📊 Token usage summary:", tokenUsageSummary);
console.log("🤖 Model dispatched:", responseModel);
console.log("🧠 Reasoning effort applied:", reasoningEffortApplied);
console.log("📝 Verbosity applied:", verbosityApplied);
try {
const normalizedResponse = normalizeJsonResponse(rawResponse);
const aiResponse = JSON.parse(normalizedResponse);
console.log(
"🔄 Parsed AI response with keys:",
Object.keys(aiResponse)
);
// Merge AI changes back into original products
// AI now only returns changed products and changed fields
const mergedProducts = products.map((original, index) => ({ ...original }));
const changeDetails = [];
if (aiResponse.correctedData && Array.isArray(aiResponse.correctedData)) {
console.log("📊 Processing AI changes - received", aiResponse.correctedData.length, "products with changes");
// Process each changed product from AI
aiResponse.correctedData.forEach((changedProduct) => {
// Find the matching original product using stable identifiers in priority order
// Priority: upc > supplier_no > notions_no
// These fields should not change during validation
const identifiers = ['upc', 'supplier_no', 'notions_no'];
let matchedIndex = -1;
let matchedBy = null;
for (const identifier of identifiers) {
if (changedProduct[identifier] !== undefined && changedProduct[identifier] !== null && changedProduct[identifier] !== '') {
matchedIndex = products.findIndex(
(p) => p[identifier] !== undefined &&
p[identifier] !== null &&
p[identifier] !== '' &&
String(p[identifier]).trim() === String(changedProduct[identifier]).trim()
);
if (matchedIndex !== -1) {
matchedBy = identifier;
console.log(`✓ Matched product by ${identifier}:`, changedProduct[identifier]);
break;
}
}
}
// If no identifier match found, log an error with details
if (matchedIndex === -1) {
console.error("❌ Could not match changed product to original. Product identifiers:", {
upc: changedProduct.upc,
supplier_no: changedProduct.supplier_no,
notions_no: changedProduct.notions_no
});
return;
}
const original = products[matchedIndex];
const productChanges = {
productIndex: matchedIndex,
title: original.name || original.title || `Product ${matchedIndex + 1}`,
changes: []
};
// Apply each changed field to the merged product
Object.keys(changedProduct).forEach((key) => {
// Check if the value actually changed
if (JSON.stringify(original[key]) !== JSON.stringify(changedProduct[key])) {
console.log(`\nProduct ${matchedIndex + 1} - Field ${key}:`);
console.log(` - Original: ${JSON.stringify(original[key])}`);
console.log(` - Corrected: ${JSON.stringify(changedProduct[key])}`);
// Apply the change to merged product
mergedProducts[matchedIndex][key] = changedProduct[key];
// Track the change
productChanges.changes.push({
field: key,
original: original[key],
corrected: changedProduct[key]
});
}
});
// Only add to changeDetails if there were actual changes
if (productChanges.changes.length > 0) {
changeDetails.push(productChanges);
}
});
console.log(`📊 Applied changes to ${changeDetails.length} products`);
}
// Replace aiResponse.correctedData with the fully merged product array
aiResponse.correctedData = mergedProducts;
// Record performance metrics after successful validation
const endTime = new Date();
let performanceMetrics = {
promptLength,
productCount: products.length,
model: responseModel,
tokenUsage: tokenUsageSummary,
reasoningTokens: tokenUsageSummary.reasoning,
reasoningEffort: reasoningEffortApplied,
verbosity: verbosityApplied,
};
try {
// Use the local PostgreSQL pool from the app instead of the MySQL connection
const pool = req.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for recording metrics");
return;
}
try {
// Insert performance data into the local PostgreSQL database
await pool.query(
`INSERT INTO ai_validation_performance
(prompt_length, product_count, start_time, end_time)
VALUES ($1, $2, $3, $4)`,
[
promptLength,
products.length,
startTime.toISOString(),
endTime.toISOString()
]
);
console.log("📊 Performance metrics inserted into database");
// Query for average processing time based on similar prompt lengths
try {
const rateResults = await pool.query(
`SELECT
AVG(duration_seconds / prompt_length) as avg_rate_per_char,
COUNT(*) as sample_count,
AVG(duration_seconds) as avg_duration
FROM ai_validation_performance`
);
if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) {
const rate = rateResults.rows[0].avg_rate_per_char;
performanceMetrics.avgRate = rate;
performanceMetrics.estimatedSeconds = Math.round(rate * promptLength);
performanceMetrics.sampleCount = rateResults.rows[0].sample_count;
performanceMetrics.calculationMethod = "rate-based";
}
console.log("📊 Performance metrics with rate calculation:", performanceMetrics);
} catch (queryError) {
console.error("⚠️ Failed to query performance metrics:", queryError);
}
} catch (insertError) {
console.error("⚠️ Failed to insert performance metrics:", insertError);
// Check if table doesn't exist and log a more helpful message
if (insertError.code === '42P01') {
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
}
}
} catch (metricError) {
// Don't fail the request if metrics recording fails
console.error("⚠️ Failed to record performance metrics:", metricError);
}
// Get sources of the prompts for tracking
let promptSources = null;
try {
// Use the local PostgreSQL pool from the app
const pool = req.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for prompt sources");
} else {
// Get system prompt for bulk validation
const systemPromptResult = await pool.query(`
SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_system' AND company IS NULL
`);
// Get general prompt for bulk validation
const generalPromptResult = await pool.query(`
SELECT * FROM ai_prompts WHERE prompt_type = 'bulk_validation_general' AND company IS NULL
`);
// Extract unique company IDs from products
const companyIds = new Set();
products.forEach(product => {
if (product.company) {
companyIds.add(String(product.company));
}
});
let companyPrompts = [];
if (companyIds.size > 0) {
// Fetch company-specific prompts for bulk validation
const companyPromptsResult = await pool.query(`
SELECT * FROM ai_prompts
WHERE prompt_type = 'bulk_validation_company_specific'
AND company = ANY($1)
`, [Array.from(companyIds)]);
companyPrompts = companyPromptsResult.rows;
}
// Format company prompts for response
// Note: Company names would require re-fetching taxonomy data
// For now, we include company ID only
const companyPromptsWithNames = companyPrompts.map(prompt => ({
id: prompt.id,
company: prompt.company,
prompt_text: prompt.prompt_text
}));
// Set prompt sources
if (generalPromptResult.rows.length > 0 && systemPromptResult.rows.length > 0) {
const generalPrompt = generalPromptResult.rows[0];
const systemPrompt = systemPromptResult.rows[0];
promptSources = {
systemPrompt: {
id: systemPrompt.id,
prompt_text: systemPrompt.prompt_text
},
generalPrompt: {
id: generalPrompt.id,
prompt_text: generalPrompt.prompt_text
},
companyPrompts: companyPromptsWithNames
};
}
}
} catch (promptSourceError) {
console.error("⚠️ Error getting prompt sources:", promptSourceError);
// Don't fail the entire validation if just prompt sources retrieval fails
}
// Include prompt sources in the response
res.json({
success: true,
...aiResponse,
changeDetails,
performanceMetrics:
performanceMetrics || {
// Fallback: calculate a simple estimate
promptLength,
processingTimeSeconds: Math.max(15, Math.round(promptLength / 1000)),
isEstimate: true,
productCount: products.length,
model: responseModel,
tokenUsage: tokenUsageSummary,
reasoningTokens: tokenUsageSummary.reasoning,
reasoningEffort: reasoningEffortApplied,
verbosity: verbosityApplied,
},
promptSources,
model: responseModel,
tokenUsage: tokenUsageSummary,
reasoningEffort: reasoningEffortApplied,
verbosity: verbosityApplied,
});
} catch (parseError) {
console.error("❌ Error parsing AI response:", parseError);
console.error("Raw response that failed to parse:", rawResponse);
res.status(500).json({
success: false,
error: "Error parsing AI response: " + parseError.message,
});
}
} catch (openaiError) {
console.error("❌ OpenAI API Error:", openaiError);
res.status(500).json({
success: false,
error: "OpenAI API Error: " + openaiError.message,
});
}
} catch (error) {
console.error("❌ AI Validation Error:", error);
console.error("Error details:", {
name: error.name,
message: error.message,
stack: error.stack,
});
res.status(500).json({
success: false,
error: error.message || "Error during AI validation",
});
} finally {
try {
await closeAllConnections();
} catch (closeError) {
console.error("⚠️ Failed to close DB connections after validation request:", closeError);
}
}
});
// Test endpoint for direct database query of taxonomy data
router.get("/test-taxonomy", async (req, res) => {
try {
console.log("Test taxonomy endpoint called");
let ssh = null;
let connection = null;
try {
// Use the optimized connection utility instead of direct SSH tunnel
console.log("🔄 Setting up connection to production database using optimized connection...");
const { ssh: connSsh, connection: connDB } = await getDbConnection();
ssh = connSsh;
connection = connDB;
console.log("MySQL connection established successfully for test");
const results = {};
// Test categories query
try {
const [categories] = await connection.query(`
SELECT cat_id, name FROM product_categories WHERE type=10 LIMIT 5
`);
results.categories = {
success: true,
count: categories.length,
sample: categories.length > 0 ? categories[0] : null
};
} catch (error) {
results.categories = {
success: false,
error: error.message,
sqlMessage: error.sqlMessage
};
}
// Test themes query
try {
const [themes] = await connection.query(`
SELECT cat_id, name FROM product_categories WHERE type=20 LIMIT 5
`);
results.themes = {
success: true,
count: themes.length,
sample: themes.length > 0 ? themes[0] : null
};
} catch (error) {
results.themes = {
success: false,
error: error.message,
sqlMessage: error.sqlMessage
};
}
// Test colors query
try {
const [colors] = await connection.query(`
SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\` LIMIT 5
`);
results.colors = {
success: true,
count: colors.length,
sample: colors.length > 0 ? colors[0] : null
};
} catch (error) {
results.colors = {
success: false,
error: error.message,
sqlMessage: error.sqlMessage
};
}
return res.json({
message: "Test taxonomy queries executed",
results: results,
timestamp: new Date().toISOString()
});
} finally {
try {
await closeAllConnections();
} catch (closeError) {
console.error("⚠️ Failed to close DB connections after test-taxonomy request:", closeError);
}
}
} catch (error) {
console.error("Test taxonomy endpoint error:", error);
return res.status(500).json({
error: error.message,
stack: error.stack
});
}
});
module.exports = router;
function extractResponseText(response) {
if (!response) return "";
const outputs = [];
if (Array.isArray(response.output)) {
outputs.push(...response.output);
}
if (Array.isArray(response.outputs)) {
outputs.push(...response.outputs);
}
const segments = outputs.flatMap((output) => collectTextSegments(output?.content ?? output));
if (segments.length === 0 && typeof response.output_text === "string") {
segments.push(response.output_text);
}
if (segments.length === 0 && response.choices?.length) {
segments.push(
...collectTextSegments(response.choices?.[0]?.message?.content)
);
}
const text = segments.join("").trim();
return text;
}
function collectTextSegments(node) {
if (node == null) return [];
if (typeof node === "string" || typeof node === "number" || typeof node === "boolean") {
return [String(node)];
}
if (Array.isArray(node)) {
return node.flatMap(collectTextSegments);
}
if (typeof node !== "object") {
return [];
}
const segments = [];
if (typeof node.text === "string") {
segments.push(node.text);
} else if (Array.isArray(node.text)) {
segments.push(...node.text.flatMap(collectTextSegments));
}
if (typeof node.content === "string") {
segments.push(node.content);
} else if (Array.isArray(node.content)) {
segments.push(...node.content.flatMap(collectTextSegments));
}
if (typeof node.output_text === "string") {
segments.push(node.output_text);
} else if (Array.isArray(node.output_text)) {
segments.push(...node.output_text.flatMap(collectTextSegments));
}
if (typeof node.value === "string") {
segments.push(node.value);
}
if (typeof node.data === "string") {
segments.push(node.data);
}
return segments;
}
function normalizeJsonResponse(text) {
if (!text || typeof text !== 'string') return text;
let cleaned = text.trim();
// Remove markdown code fences if present
if (cleaned.startsWith('```')) {
const firstLineBreak = cleaned.indexOf('\n');
if (firstLineBreak !== -1) {
cleaned = cleaned.substring(firstLineBreak + 1);
} else {
cleaned = cleaned.replace(/^```/, '');
}
const closingFenceIndex = cleaned.lastIndexOf('```');
if (closingFenceIndex !== -1) {
cleaned = cleaned.substring(0, closingFenceIndex);
}
cleaned = cleaned.trim();
}
// Attempt to repair truncated JSON
// This handles cases where the AI response was cut off mid-response
cleaned = repairTruncatedJson(cleaned);
return cleaned;
}
/**
* Attempt to repair truncated JSON by adding missing closing brackets/braces
* This is a common issue when AI responses hit token limits
*/
function repairTruncatedJson(text) {
if (!text || typeof text !== 'string') return text;
// First, try parsing as-is
try {
JSON.parse(text);
return text; // Valid JSON, no repair needed
} catch (e) {
// JSON is invalid, try to repair
}
let repaired = text.trim();
// Count opening and closing brackets/braces
let braceCount = 0; // {}
let bracketCount = 0; // []
let inString = false;
let escapeNext = false;
for (let i = 0; i < repaired.length; i++) {
const char = repaired[i];
if (escapeNext) {
escapeNext = false;
continue;
}
if (char === '\\' && inString) {
escapeNext = true;
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{') braceCount++;
else if (char === '}') braceCount--;
else if (char === '[') bracketCount++;
else if (char === ']') bracketCount--;
}
}
// If we're still inside a string, close it
if (inString) {
repaired += '"';
}
// Add missing closing brackets and braces
// Close arrays first, then objects (reverse of typical nesting)
while (bracketCount > 0) {
repaired += ']';
bracketCount--;
}
while (braceCount > 0) {
repaired += '}';
braceCount--;
}
// Try parsing the repaired JSON
try {
JSON.parse(repaired);
console.log('✅ Successfully repaired truncated JSON');
return repaired;
} catch (e) {
// Repair failed, return original and let the caller handle the error
console.log('⚠️ JSON repair attempt failed:', e.message);
return text;
}
}