1024 lines
36 KiB
JavaScript
1024 lines
36 KiB
JavaScript
const express = require("express");
|
|
const router = express.Router();
|
|
const OpenAI = require("openai");
|
|
const fs = require("fs").promises;
|
|
const path = require("path");
|
|
const dotenv = require("dotenv");
|
|
const mysql = require('mysql2/promise');
|
|
const { Client } = require('ssh2');
|
|
|
|
// Ensure environment variables are loaded
|
|
dotenv.config({ path: path.join(__dirname, "../../.env") });
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY,
|
|
});
|
|
|
|
if (!process.env.OPENAI_API_KEY) {
|
|
console.error("Warning: OPENAI_API_KEY is not set in environment variables");
|
|
}
|
|
|
|
// Helper function to setup SSH tunnel to production database
|
|
async function setupSshTunnel() {
|
|
const sshConfig = {
|
|
host: process.env.PROD_SSH_HOST,
|
|
port: process.env.PROD_SSH_PORT || 22,
|
|
username: process.env.PROD_SSH_USER,
|
|
privateKey: process.env.PROD_SSH_KEY_PATH
|
|
? require('fs').readFileSync(process.env.PROD_SSH_KEY_PATH)
|
|
: undefined,
|
|
compress: true
|
|
};
|
|
|
|
const dbConfig = {
|
|
host: process.env.PROD_DB_HOST || 'localhost',
|
|
user: process.env.PROD_DB_USER,
|
|
password: process.env.PROD_DB_PASSWORD,
|
|
database: process.env.PROD_DB_NAME,
|
|
port: process.env.PROD_DB_PORT || 3306,
|
|
timezone: 'Z'
|
|
};
|
|
|
|
return new Promise((resolve, reject) => {
|
|
const ssh = new Client();
|
|
|
|
ssh.on('error', (err) => {
|
|
console.error('SSH connection error:', err);
|
|
reject(err);
|
|
});
|
|
|
|
ssh.on('ready', () => {
|
|
ssh.forwardOut(
|
|
'127.0.0.1',
|
|
0,
|
|
dbConfig.host,
|
|
dbConfig.port,
|
|
(err, stream) => {
|
|
if (err) reject(err);
|
|
resolve({ ssh, stream, dbConfig });
|
|
}
|
|
);
|
|
}).connect(sshConfig);
|
|
});
|
|
}
|
|
|
|
// Debug endpoint for viewing prompt
|
|
router.post("/debug", async (req, res) => {
|
|
try {
|
|
console.log("Debug POST endpoint called");
|
|
|
|
const { products } = req.body;
|
|
|
|
console.log("Received products for debug:", {
|
|
isArray: Array.isArray(products),
|
|
length: products?.length,
|
|
firstProduct: products?.[0],
|
|
lastProduct: products?.[products?.length - 1],
|
|
});
|
|
|
|
if (!Array.isArray(products)) {
|
|
console.error("Invalid input: products is not an array");
|
|
return res.status(400).json({ error: "Products must be an array" });
|
|
}
|
|
|
|
if (products.length === 0) {
|
|
console.error("Invalid input: products array is empty");
|
|
return res.status(400).json({ error: "Products array cannot be empty" });
|
|
}
|
|
|
|
// Clean the products array to remove any internal fields
|
|
const cleanedProducts = products.map((product) => {
|
|
const { __errors, __index, ...cleanProduct } = product;
|
|
return cleanProduct;
|
|
});
|
|
|
|
console.log("Processing debug request with cleaned products:", {
|
|
length: cleanedProducts.length,
|
|
sample: cleanedProducts[0],
|
|
});
|
|
|
|
try {
|
|
const debugResponse = await generateDebugResponse(cleanedProducts, res);
|
|
|
|
// Get estimated processing time based on prompt length
|
|
if (debugResponse && debugResponse.promptLength) {
|
|
try {
|
|
// Use the pool from the app
|
|
const pool = req.app.locals.pool;
|
|
if (!pool) {
|
|
console.warn("⚠️ Local database pool not available for time estimates");
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Instead of looking for similar prompt lengths, calculate an average processing rate
|
|
const rateResults = await pool.query(
|
|
`SELECT
|
|
AVG(duration_seconds / prompt_length) as avg_rate_per_char,
|
|
COUNT(*) as sample_count,
|
|
AVG(duration_seconds) as avg_duration
|
|
FROM ai_validation_performance`
|
|
);
|
|
|
|
// Add estimated time to the response
|
|
if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) {
|
|
// Calculate estimated time based on the rate and current prompt length
|
|
const rate = rateResults.rows[0].avg_rate_per_char;
|
|
const estimatedSeconds = Math.max(15, Math.round(rate * debugResponse.promptLength));
|
|
|
|
debugResponse.estimatedProcessingTime = {
|
|
seconds: estimatedSeconds,
|
|
sampleCount: rateResults.rows[0].sample_count || 0,
|
|
avgRate: rate,
|
|
calculationMethod: "rate-based"
|
|
};
|
|
console.log("📊 Calculated time estimate using rate-based method:", {
|
|
rate: rate,
|
|
promptLength: debugResponse.promptLength,
|
|
estimatedSeconds: estimatedSeconds,
|
|
sampleCount: rateResults.rows[0].sample_count
|
|
});
|
|
} else {
|
|
// Fallback: Calculate a simple estimate based on prompt length (1 second per 1000 characters)
|
|
const estimatedSeconds = Math.max(15, Math.round(debugResponse.promptLength / 1000));
|
|
console.log("📊 No rate data available, using fallback calculation");
|
|
debugResponse.estimatedProcessingTime = {
|
|
seconds: estimatedSeconds,
|
|
sampleCount: 0,
|
|
isEstimate: true,
|
|
calculationMethod: "fallback"
|
|
};
|
|
console.log("📊 Fallback time estimate:", debugResponse.estimatedProcessingTime);
|
|
}
|
|
} catch (queryError) {
|
|
console.error("⚠️ Failed to query performance metrics:", queryError);
|
|
// Check if table doesn't exist and log a more helpful message
|
|
if (queryError.code === '42P01') {
|
|
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
|
|
}
|
|
}
|
|
} catch (timeEstimateError) {
|
|
console.error("Error getting time estimate:", timeEstimateError);
|
|
// Don't fail the request if time estimate fails
|
|
}
|
|
}
|
|
|
|
return res.json(debugResponse);
|
|
} catch (generateError) {
|
|
console.error("Error generating debug response:", generateError);
|
|
return res.status(500).json({
|
|
error: "Error generating debug response: " + generateError.message,
|
|
stack: generateError.stack,
|
|
name: generateError.name,
|
|
code: generateError.code,
|
|
sqlMessage: generateError.sqlMessage,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error("Debug POST endpoint error:", error);
|
|
res.status(500).json({
|
|
error: error.message,
|
|
stack: error.stack,
|
|
code: error.code || null,
|
|
name: error.name || null
|
|
});
|
|
}
|
|
});
|
|
|
|
// Helper function to generate debug response
|
|
async function generateDebugResponse(productsToUse, res) {
|
|
let taxonomy = null;
|
|
let mysqlConnection = null;
|
|
let ssh = null;
|
|
|
|
try {
|
|
// Load taxonomy data first
|
|
console.log("Loading taxonomy data...");
|
|
try {
|
|
// Setup MySQL connection via SSH tunnel
|
|
const tunnel = await setupSshTunnel();
|
|
ssh = tunnel.ssh;
|
|
|
|
mysqlConnection = await mysql.createConnection({
|
|
...tunnel.dbConfig,
|
|
stream: tunnel.stream
|
|
});
|
|
|
|
console.log("MySQL connection established successfully");
|
|
|
|
taxonomy = await getTaxonomyData(mysqlConnection);
|
|
console.log("Successfully loaded taxonomy data");
|
|
} catch (taxonomyError) {
|
|
console.error("Failed to load taxonomy data:", taxonomyError);
|
|
return res.status(500).json({
|
|
error: "Error fetching taxonomy data: " + taxonomyError.message,
|
|
sqlMessage: taxonomyError.sqlMessage || null,
|
|
sqlState: taxonomyError.sqlState || null,
|
|
code: taxonomyError.code || null,
|
|
errno: taxonomyError.errno || null,
|
|
sql: taxonomyError.sql || null,
|
|
});
|
|
} finally {
|
|
// Make sure we close the connection
|
|
if (mysqlConnection) await mysqlConnection.end();
|
|
if (ssh) ssh.end();
|
|
}
|
|
|
|
// Verify the taxonomy data structure
|
|
console.log("Verifying taxonomy structure...");
|
|
if (!taxonomy) {
|
|
console.error("Taxonomy data is null");
|
|
return res.status(500).json({ error: "Taxonomy data is null" });
|
|
}
|
|
|
|
// Check if each taxonomy component exists
|
|
const taxonomyComponents = [
|
|
"categories", "themes", "colors", "taxCodes", "sizeCategories",
|
|
"suppliers", "companies", "artists", "lines", "subLines"
|
|
];
|
|
|
|
const missingComponents = taxonomyComponents.filter(comp => !taxonomy[comp]);
|
|
if (missingComponents.length > 0) {
|
|
console.error("Missing taxonomy components:", missingComponents);
|
|
}
|
|
|
|
// Log detailed taxonomy stats for debugging
|
|
console.log("Taxonomy data loaded with details:", {
|
|
categories: {
|
|
length: taxonomy.categories?.length || 0,
|
|
sample: taxonomy.categories?.length > 0 ? JSON.stringify(taxonomy.categories[0]).substring(0, 100) + "..." : null
|
|
},
|
|
themes: {
|
|
length: taxonomy.themes?.length || 0,
|
|
sample: taxonomy.themes?.length > 0 ? JSON.stringify(taxonomy.themes[0]).substring(0, 100) + "..." : null
|
|
},
|
|
colors: {
|
|
length: taxonomy.colors?.length || 0,
|
|
sample: taxonomy.colors?.length > 0 ? JSON.stringify(taxonomy.colors[0]) : null
|
|
},
|
|
taxCodes: {
|
|
length: taxonomy.taxCodes?.length || 0,
|
|
sample: taxonomy.taxCodes?.length > 0 ? JSON.stringify(taxonomy.taxCodes[0]) : null
|
|
},
|
|
sizeCategories: {
|
|
length: taxonomy.sizeCategories?.length || 0,
|
|
sample: taxonomy.sizeCategories?.length > 0 ? JSON.stringify(taxonomy.sizeCategories[0]) : null
|
|
},
|
|
suppliers: {
|
|
length: taxonomy.suppliers?.length || 0,
|
|
sample: taxonomy.suppliers?.length > 0 ? JSON.stringify(taxonomy.suppliers[0]) : null
|
|
},
|
|
companies: {
|
|
length: taxonomy.companies?.length || 0,
|
|
sample: taxonomy.companies?.length > 0 ? JSON.stringify(taxonomy.companies[0]) : null
|
|
},
|
|
artists: {
|
|
length: taxonomy.artists?.length || 0,
|
|
sample: taxonomy.artists?.length > 0 ? JSON.stringify(taxonomy.artists[0]) : null
|
|
}
|
|
});
|
|
|
|
// Load the prompt using the same function used by validation
|
|
console.log("Loading prompt...");
|
|
|
|
// Setup a new connection for loading the prompt
|
|
const promptTunnel = await setupSshTunnel();
|
|
const promptConnection = await mysql.createConnection({
|
|
...promptTunnel.dbConfig,
|
|
stream: promptTunnel.stream
|
|
});
|
|
|
|
try {
|
|
const prompt = await loadPrompt(promptConnection, productsToUse);
|
|
const fullPrompt = prompt + "\n" + JSON.stringify(productsToUse);
|
|
|
|
// Create the response with taxonomy stats
|
|
let categoriesCount = 0;
|
|
try {
|
|
categoriesCount = taxonomy?.categories?.length ? countItems(taxonomy.categories) : 0;
|
|
} catch (countError) {
|
|
console.error("Error counting categories:", countError);
|
|
categoriesCount = taxonomy?.categories?.length || 0; // Fallback to simple length
|
|
}
|
|
|
|
const response = {
|
|
taxonomyStats: taxonomy
|
|
? {
|
|
categories: categoriesCount,
|
|
themes: taxonomy.themes?.length || 0,
|
|
colors: taxonomy.colors?.length || 0,
|
|
taxCodes: taxonomy.taxCodes?.length || 0,
|
|
sizeCategories: taxonomy.sizeCategories?.length || 0,
|
|
suppliers: taxonomy.suppliers?.length || 0,
|
|
companies: taxonomy.companies?.length || 0,
|
|
artists: taxonomy.artists?.length || 0,
|
|
// Add filtered counts when products are provided
|
|
filtered: productsToUse
|
|
? {
|
|
suppliers: taxonomy.suppliers?.filter(([id]) =>
|
|
productsToUse.some(
|
|
(p) => Number(p.supplierid) === Number(id)
|
|
)
|
|
)?.length || 0,
|
|
companies: taxonomy.companies?.filter(([id]) =>
|
|
productsToUse.some((p) => Number(p.company) === Number(id))
|
|
)?.length || 0,
|
|
artists: taxonomy.artists?.filter(([id]) =>
|
|
productsToUse.some((p) => Number(p.artist) === Number(id))
|
|
)?.length || 0,
|
|
}
|
|
: null,
|
|
}
|
|
: null,
|
|
basePrompt: prompt,
|
|
sampleFullPrompt: fullPrompt,
|
|
promptLength: fullPrompt.length,
|
|
};
|
|
|
|
console.log("Sending response with taxonomy stats:", response.taxonomyStats);
|
|
return response;
|
|
} finally {
|
|
if (promptConnection) await promptConnection.end();
|
|
if (promptTunnel.ssh) promptTunnel.ssh.end();
|
|
}
|
|
} catch (error) {
|
|
console.error("Error generating debug response:", error);
|
|
return res.status(500).json({
|
|
error: error.message,
|
|
stack: error.stack,
|
|
sqlMessage: error.sqlMessage || null,
|
|
sqlState: error.sqlState || null,
|
|
code: error.code || null,
|
|
errno: error.errno || null,
|
|
taxonomyState: taxonomy ? "loaded" : "failed",
|
|
});
|
|
}
|
|
}
|
|
|
|
// Helper function to count total items in hierarchical structure
|
|
function countItems(items) {
|
|
return items.reduce((count, item) => {
|
|
return (
|
|
count + 1 + (item.subcategories ? countItems(item.subcategories) : 0)
|
|
);
|
|
}, 0);
|
|
}
|
|
|
|
// Function to fetch and format taxonomy data
|
|
async function getTaxonomyData(connection) {
|
|
try {
|
|
console.log("Starting taxonomy data fetch...");
|
|
// Fetch categories with hierarchy
|
|
const [categories] = await connection.query(`
|
|
SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order FROM product_categories s WHERE type=10 UNION ALL SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order FROM product_categories c JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE c.type=11 AND s.type=10 UNION ALL SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order FROM product_categories sc JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE sc.type=12 AND c.type=11 AND s.type=10 UNION ALL SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order FROM product_categories ssc JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id JOIN product_categories c ON sc.master_cat_id=c.cat_id JOIN product_categories s ON c.master_cat_id=s.cat_id WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10 ORDER BY level_order,cat_id;
|
|
`);
|
|
console.log("Categories fetched:", categories.length);
|
|
|
|
// Fetch themes with hierarchy
|
|
const [themes] = await connection.query(`
|
|
SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order FROM product_categories t WHERE t.type=20 UNION ALL SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order FROM product_categories ts JOIN product_categories t ON ts.master_cat_id=t.cat_id WHERE ts.type=21 AND t.type=20 ORDER BY level_order,name
|
|
`);
|
|
console.log("Themes fetched:", themes.length);
|
|
|
|
// Fetch colors
|
|
const [colors] = await connection.query(
|
|
`SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\``
|
|
);
|
|
console.log("Colors fetched:", colors.length);
|
|
|
|
// Fetch tax codes
|
|
const [taxCodes] = await connection.query(
|
|
`SELECT tax_code_id, name FROM product_tax_codes ORDER BY name`
|
|
);
|
|
console.log("Tax codes fetched:", taxCodes.length);
|
|
|
|
// Fetch size categories
|
|
const [sizeCategories] = await connection.query(
|
|
`SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name`
|
|
);
|
|
console.log("Size categories fetched:", sizeCategories.length);
|
|
|
|
// Fetch suppliers
|
|
const [suppliers] = await connection.query(`
|
|
SELECT supplierid, companyname as name
|
|
FROM suppliers
|
|
WHERE companyname <> ''
|
|
ORDER BY companyname
|
|
`);
|
|
console.log("Suppliers fetched:", suppliers.length);
|
|
|
|
// Fetch companies (type 1)
|
|
const [companies] = await connection.query(`
|
|
SELECT cat_id, name
|
|
FROM product_categories
|
|
WHERE type = 1
|
|
ORDER BY name
|
|
`);
|
|
console.log("Companies fetched:", companies.length);
|
|
|
|
// Fetch artists (type 40)
|
|
const [artists] = await connection.query(`
|
|
SELECT cat_id, name
|
|
FROM product_categories
|
|
WHERE type = 40
|
|
ORDER BY name
|
|
`);
|
|
console.log("Artists fetched:", artists.length);
|
|
|
|
// Fetch lines (type 2)
|
|
const [lines] = await connection.query(`
|
|
SELECT cat_id, name
|
|
FROM product_categories
|
|
WHERE type = 2
|
|
ORDER BY name
|
|
`);
|
|
console.log("Lines fetched:", lines.length);
|
|
|
|
// Fetch sub-lines (type 3)
|
|
const [subLines] = await connection.query(`
|
|
SELECT cat_id, name
|
|
FROM product_categories
|
|
WHERE type = 3
|
|
ORDER BY name
|
|
`);
|
|
console.log("Sub-lines fetched:", subLines.length);
|
|
|
|
// Format categories into a hierarchical structure
|
|
const formatHierarchy = (items, level = 1, parentId = null) => {
|
|
return items
|
|
.filter(
|
|
(item) =>
|
|
item.level_order === level && item.master_cat_id === parentId
|
|
)
|
|
.map((item) => {
|
|
const children = formatHierarchy(items, level + 1, item.cat_id);
|
|
return children.length > 0
|
|
? [item.cat_id, item.name, children]
|
|
: [item.cat_id, item.name];
|
|
});
|
|
};
|
|
|
|
// Format themes similarly but with only two levels
|
|
const formatThemes = (items) => {
|
|
return items
|
|
.filter((item) => item.level_order === 1)
|
|
.map((item) => {
|
|
const subthemes = items
|
|
.filter((subitem) => subitem.master_cat_id === item.cat_id)
|
|
.map((subitem) => [subitem.cat_id, subitem.name]);
|
|
return subthemes.length > 0
|
|
? [item.cat_id, item.name, subthemes]
|
|
: [item.cat_id, item.name];
|
|
});
|
|
};
|
|
|
|
// Log first item of each taxonomy category to check structure
|
|
console.log("Sample category:", categories.length > 0 ? categories[0] : "No categories");
|
|
console.log("Sample theme:", themes.length > 0 ? themes[0] : "No themes");
|
|
console.log("Sample color:", colors.length > 0 ? colors[0] : "No colors");
|
|
|
|
const formattedData = {
|
|
categories: formatHierarchy(categories),
|
|
themes: formatThemes(themes),
|
|
colors: colors.map((c) => [c.color, c.name, c.hex_color]),
|
|
taxCodes: (taxCodes || []).map((tc) => [tc.tax_code_id, tc.name]),
|
|
sizeCategories: (sizeCategories || []).map((sc) => [sc.cat_id, sc.name]),
|
|
suppliers: suppliers.map((s) => [s.supplierid, s.name]),
|
|
companies: companies.map((c) => [c.cat_id, c.name]),
|
|
artists: artists.map((a) => [a.cat_id, a.name]),
|
|
lines: lines.map((l) => [l.cat_id, l.name]),
|
|
subLines: subLines.map((sl) => [sl.cat_id, sl.name]),
|
|
};
|
|
|
|
// Check the formatted structure
|
|
console.log("Formatted categories count:", formattedData.categories.length);
|
|
console.log("Formatted themes count:", formattedData.themes.length);
|
|
console.log("Formatted colors count:", formattedData.colors.length);
|
|
|
|
return formattedData;
|
|
} catch (error) {
|
|
console.error("Error fetching taxonomy data:", error);
|
|
console.error("Full error details:", {
|
|
message: error.message,
|
|
stack: error.stack,
|
|
code: error.code,
|
|
errno: error.errno,
|
|
sqlMessage: error.sqlMessage,
|
|
sqlState: error.sqlState,
|
|
sql: error.sql
|
|
});
|
|
|
|
// Instead of silently returning empty arrays, throw the error to be handled by the caller
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Load the prompt from file and inject taxonomy data
|
|
async function loadPrompt(connection, productsToValidate = null) {
|
|
try {
|
|
const promptPath = path.join(
|
|
__dirname,
|
|
"..",
|
|
"prompts",
|
|
"product-validation.txt"
|
|
);
|
|
const basePrompt = await fs.readFile(promptPath, "utf8");
|
|
|
|
// Get taxonomy data using the provided MySQL connection
|
|
const taxonomy = await getTaxonomyData(connection);
|
|
|
|
// Add system instructions to the prompt
|
|
const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone. You have meticulous attention to detail and are a master at your craft.`;
|
|
|
|
// If we have products to validate, create a filtered prompt
|
|
if (productsToValidate) {
|
|
console.log("Creating filtered prompt for products:", productsToValidate);
|
|
|
|
// Extract unique values from products for non-core attributes
|
|
const uniqueValues = {
|
|
supplierIds: new Set(),
|
|
companyIds: new Set(),
|
|
artistIds: new Set(),
|
|
lineIds: new Set(),
|
|
subLineIds: new Set(),
|
|
};
|
|
|
|
// Collect any values that exist in the products
|
|
productsToValidate.forEach((product) => {
|
|
Object.entries(product).forEach(([key, value]) => {
|
|
if (value === undefined || value === null) return;
|
|
|
|
// Map field names to their respective sets
|
|
const fieldMap = {
|
|
supplierid: "supplierIds",
|
|
supplier: "supplierIds",
|
|
company: "companyIds",
|
|
artist: "artistIds",
|
|
line: "lineIds",
|
|
subline: "subLineIds",
|
|
};
|
|
|
|
if (fieldMap[key]) {
|
|
uniqueValues[fieldMap[key]].add(Number(value));
|
|
}
|
|
});
|
|
});
|
|
|
|
console.log("Unique values collected:", {
|
|
suppliers: Array.from(uniqueValues.supplierIds),
|
|
companies: Array.from(uniqueValues.companyIds),
|
|
artists: Array.from(uniqueValues.artistIds),
|
|
lines: Array.from(uniqueValues.lineIds),
|
|
subLines: Array.from(uniqueValues.subLineIds),
|
|
});
|
|
|
|
// Create mixed taxonomy with filtered non-core data and full core data
|
|
const mixedTaxonomy = {
|
|
// Keep full data for core attributes
|
|
categories: taxonomy.categories,
|
|
themes: taxonomy.themes,
|
|
colors: taxonomy.colors,
|
|
taxCodes: taxonomy.taxCodes,
|
|
sizeCategories: taxonomy.sizeCategories,
|
|
// For non-core data, only include items that are actually used
|
|
suppliers: taxonomy.suppliers.filter(([id]) =>
|
|
uniqueValues.supplierIds.has(Number(id))
|
|
),
|
|
companies: taxonomy.companies.filter(([id]) =>
|
|
uniqueValues.companyIds.has(Number(id))
|
|
),
|
|
artists: taxonomy.artists.filter(([id]) =>
|
|
uniqueValues.artistIds.has(Number(id))
|
|
),
|
|
lines: taxonomy.lines.filter(([id]) =>
|
|
uniqueValues.lineIds.has(Number(id))
|
|
),
|
|
subLines: taxonomy.subLines.filter(([id]) =>
|
|
uniqueValues.subLineIds.has(Number(id))
|
|
),
|
|
};
|
|
|
|
console.log("Filtered taxonomy counts:", {
|
|
suppliers: mixedTaxonomy.suppliers.length,
|
|
companies: mixedTaxonomy.companies.length,
|
|
artists: mixedTaxonomy.artists.length,
|
|
lines: mixedTaxonomy.lines.length,
|
|
subLines: mixedTaxonomy.subLines.length,
|
|
});
|
|
|
|
// Format taxonomy data for the prompt, only including sections with values
|
|
const taxonomySection = `
|
|
All Available Categories:
|
|
${JSON.stringify(mixedTaxonomy.categories)}
|
|
|
|
All Available Themes:
|
|
${JSON.stringify(mixedTaxonomy.themes)}
|
|
|
|
All Available Colors:
|
|
${JSON.stringify(mixedTaxonomy.colors)}
|
|
|
|
All Available Tax Codes:
|
|
${JSON.stringify(mixedTaxonomy.taxCodes)}
|
|
|
|
All Available Size Categories:
|
|
${JSON.stringify(mixedTaxonomy.sizeCategories)}${
|
|
mixedTaxonomy.suppliers.length
|
|
? `\n\nSuppliers Used In This Data:\n${JSON.stringify(
|
|
mixedTaxonomy.suppliers
|
|
)}`
|
|
: ""
|
|
}${
|
|
mixedTaxonomy.companies.length
|
|
? `\n\nCompanies Used In This Data:\n${JSON.stringify(
|
|
mixedTaxonomy.companies
|
|
)}`
|
|
: ""
|
|
}${
|
|
mixedTaxonomy.artists.length
|
|
? `\n\nArtists Used In This Data:\n${JSON.stringify(
|
|
mixedTaxonomy.artists
|
|
)}`
|
|
: ""
|
|
}${
|
|
mixedTaxonomy.lines.length
|
|
? `\n\nLines Used In This Data:\n${JSON.stringify(
|
|
mixedTaxonomy.lines
|
|
)}`
|
|
: ""
|
|
}${
|
|
mixedTaxonomy.subLines.length
|
|
? `\n\nSub-Lines Used In This Data:\n${JSON.stringify(
|
|
mixedTaxonomy.subLines
|
|
)}`
|
|
: ""
|
|
}
|
|
|
|
----------Here is the product data to validate----------`;
|
|
|
|
// Return the filtered prompt
|
|
return systemInstructions + basePrompt + "\n" + taxonomySection;
|
|
}
|
|
|
|
// Generate the full unfiltered prompt
|
|
const taxonomySection = `
|
|
Available Categories:
|
|
${JSON.stringify(taxonomy.categories)}
|
|
|
|
Available Themes:
|
|
${JSON.stringify(taxonomy.themes)}
|
|
|
|
Available Colors:
|
|
${JSON.stringify(taxonomy.colors)}
|
|
|
|
Available Tax Codes:
|
|
${JSON.stringify(taxonomy.taxCodes)}
|
|
|
|
Available Size Categories:
|
|
${JSON.stringify(taxonomy.sizeCategories)}
|
|
|
|
Available Suppliers:
|
|
${JSON.stringify(taxonomy.suppliers)}
|
|
|
|
Available Companies:
|
|
${JSON.stringify(taxonomy.companies)}
|
|
|
|
Available Artists:
|
|
${JSON.stringify(taxonomy.artists)}
|
|
|
|
Here is the product data to validate:`;
|
|
|
|
return systemInstructions + basePrompt + "\n" + taxonomySection;
|
|
} catch (error) {
|
|
console.error("Error loading prompt:", error);
|
|
throw error; // Re-throw to be handled by the calling function
|
|
}
|
|
}
|
|
|
|
router.post("/validate", async (req, res) => {
|
|
try {
|
|
const { products } = req.body;
|
|
const startTime = new Date(); // Track start time for performance metrics
|
|
|
|
console.log("🔍 Received products for validation:", {
|
|
isArray: Array.isArray(products),
|
|
length: products?.length,
|
|
firstProduct: products?.[0],
|
|
lastProduct: products?.[products?.length - 1],
|
|
});
|
|
|
|
if (!Array.isArray(products)) {
|
|
console.error("❌ Invalid input: products is not an array");
|
|
return res.status(400).json({ error: "Products must be an array" });
|
|
}
|
|
|
|
if (products.length === 0) {
|
|
console.error("❌ Invalid input: products array is empty");
|
|
return res.status(400).json({ error: "Products array cannot be empty" });
|
|
}
|
|
|
|
let ssh = null;
|
|
let connection = null;
|
|
let promptLength = 0; // Track prompt length for performance metrics
|
|
|
|
try {
|
|
// Setup MySQL connection via SSH tunnel
|
|
console.log("🔄 Setting up connection to production database...");
|
|
const tunnel = await setupSshTunnel();
|
|
ssh = tunnel.ssh;
|
|
|
|
connection = await mysql.createConnection({
|
|
...tunnel.dbConfig,
|
|
stream: tunnel.stream
|
|
});
|
|
|
|
console.log("🔄 MySQL connection established successfully");
|
|
|
|
// Load the prompt with the products data to filter taxonomy
|
|
console.log("🔄 Loading prompt with filtered taxonomy...");
|
|
const prompt = await loadPrompt(connection, products);
|
|
const fullPrompt = prompt + "\n" + JSON.stringify(products);
|
|
promptLength = fullPrompt.length; // Store prompt length for performance metrics
|
|
console.log("📝 Generated prompt length:", promptLength);
|
|
|
|
console.log("🤖 Sending request to OpenAI...");
|
|
const completion = await openai.chat.completions.create({
|
|
model: "gpt-4o",
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: fullPrompt,
|
|
},
|
|
],
|
|
temperature: 0.2,
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
console.log("✅ Received response from OpenAI");
|
|
const rawResponse = completion.choices[0].message.content;
|
|
console.log("📄 Raw AI response length:", rawResponse.length);
|
|
|
|
try {
|
|
const aiResponse = JSON.parse(rawResponse);
|
|
console.log(
|
|
"🔄 Parsed AI response with keys:",
|
|
Object.keys(aiResponse)
|
|
);
|
|
|
|
// Create a detailed comparison between original and corrected data
|
|
const changeDetails = [];
|
|
|
|
// Compare original and corrected data
|
|
if (aiResponse.correctedData) {
|
|
console.log("📊 Changes summary:");
|
|
products.forEach((original, index) => {
|
|
const corrected = aiResponse.correctedData[index];
|
|
if (corrected) {
|
|
const productChanges = {
|
|
productIndex: index,
|
|
title: original.title || `Product ${index + 1}`,
|
|
changes: []
|
|
};
|
|
|
|
const changes = Object.keys(corrected).filter(
|
|
(key) =>
|
|
JSON.stringify(original[key]) !==
|
|
JSON.stringify(corrected[key])
|
|
);
|
|
|
|
if (changes.length > 0) {
|
|
console.log(`\nProduct ${index + 1} changes:`);
|
|
changes.forEach((key) => {
|
|
console.log(` ${key}:`);
|
|
console.log(
|
|
` - Original: ${JSON.stringify(original[key])}`
|
|
);
|
|
console.log(
|
|
` - Corrected: ${JSON.stringify(corrected[key])}`
|
|
);
|
|
|
|
// Add to our detailed changes array
|
|
productChanges.changes.push({
|
|
field: key,
|
|
original: original[key],
|
|
corrected: corrected[key]
|
|
});
|
|
});
|
|
|
|
// Only add products that have changes
|
|
if (productChanges.changes.length > 0) {
|
|
changeDetails.push(productChanges);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
// Record performance metrics after successful validation
|
|
const endTime = new Date();
|
|
let performanceMetrics = {
|
|
promptLength,
|
|
productCount: products.length,
|
|
processingTimeSeconds: (endTime - startTime) / 1000
|
|
};
|
|
|
|
try {
|
|
// Use the local PostgreSQL pool from the app instead of the MySQL connection
|
|
const pool = req.app.locals.pool;
|
|
if (!pool) {
|
|
console.warn("⚠️ Local database pool not available for recording metrics");
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Insert performance data into the local PostgreSQL database
|
|
await pool.query(
|
|
`INSERT INTO ai_validation_performance
|
|
(prompt_length, product_count, start_time, end_time, duration_seconds)
|
|
VALUES ($1, $2, $3, $4, $5)`,
|
|
[
|
|
promptLength,
|
|
products.length,
|
|
startTime,
|
|
endTime,
|
|
(endTime - startTime) / 1000
|
|
]
|
|
);
|
|
|
|
console.log("📊 Performance metrics inserted into database");
|
|
|
|
// Query for average processing time based on similar prompt lengths
|
|
try {
|
|
const rateResults = await pool.query(
|
|
`SELECT
|
|
AVG(duration_seconds / prompt_length) as avg_rate_per_char,
|
|
COUNT(*) as sample_count,
|
|
AVG(duration_seconds) as avg_duration
|
|
FROM ai_validation_performance`
|
|
);
|
|
|
|
if (rateResults.rows && rateResults.rows[0] && rateResults.rows[0].avg_rate_per_char) {
|
|
const rate = rateResults.rows[0].avg_rate_per_char;
|
|
performanceMetrics.avgRate = rate;
|
|
performanceMetrics.estimatedSeconds = Math.round(rate * promptLength);
|
|
performanceMetrics.sampleCount = rateResults.rows[0].sample_count;
|
|
performanceMetrics.calculationMethod = "rate-based";
|
|
}
|
|
|
|
console.log("📊 Performance metrics with rate calculation:", performanceMetrics);
|
|
} catch (queryError) {
|
|
console.error("⚠️ Failed to query performance metrics:", queryError);
|
|
}
|
|
} catch (insertError) {
|
|
console.error("⚠️ Failed to insert performance metrics:", insertError);
|
|
// Check if table doesn't exist and log a more helpful message
|
|
if (insertError.code === '42P01') {
|
|
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
|
|
}
|
|
}
|
|
} catch (metricError) {
|
|
// Don't fail the request if metrics recording fails
|
|
console.error("⚠️ Failed to record performance metrics:", metricError);
|
|
}
|
|
|
|
// Include performance metrics in the response
|
|
res.json({
|
|
success: true,
|
|
changeDetails: changeDetails,
|
|
performanceMetrics: performanceMetrics || {
|
|
// Fallback: calculate a simple estimate
|
|
promptLength: promptLength,
|
|
processingTimeSeconds: Math.max(15, Math.round(promptLength / 1000)),
|
|
isEstimate: true,
|
|
productCount: products.length
|
|
},
|
|
...aiResponse,
|
|
});
|
|
} catch (parseError) {
|
|
console.error("❌ Error parsing AI response:", parseError);
|
|
console.error("Raw response that failed to parse:", rawResponse);
|
|
res.status(500).json({
|
|
success: false,
|
|
error: "Error parsing AI response: " + parseError.message,
|
|
});
|
|
}
|
|
} catch (openaiError) {
|
|
console.error("❌ OpenAI API Error:", openaiError);
|
|
res.status(500).json({
|
|
success: false,
|
|
error: "OpenAI API Error: " + openaiError.message,
|
|
});
|
|
} finally {
|
|
// Clean up database connection and SSH tunnel
|
|
if (connection) await connection.end();
|
|
if (ssh) ssh.end();
|
|
}
|
|
} catch (error) {
|
|
console.error("❌ AI Validation Error:", error);
|
|
console.error("Error details:", {
|
|
name: error.name,
|
|
message: error.message,
|
|
stack: error.stack,
|
|
});
|
|
res.status(500).json({
|
|
success: false,
|
|
error: error.message || "Error during AI validation",
|
|
});
|
|
}
|
|
});
|
|
|
|
// Test endpoint for direct database query of taxonomy data
|
|
router.get("/test-taxonomy", async (req, res) => {
|
|
try {
|
|
console.log("Test taxonomy endpoint called");
|
|
|
|
let ssh = null;
|
|
let connection = null;
|
|
|
|
try {
|
|
// Setup MySQL connection via SSH tunnel
|
|
const tunnel = await setupSshTunnel();
|
|
ssh = tunnel.ssh;
|
|
|
|
connection = await mysql.createConnection({
|
|
...tunnel.dbConfig,
|
|
stream: tunnel.stream
|
|
});
|
|
|
|
console.log("MySQL connection established successfully for test");
|
|
|
|
const results = {};
|
|
|
|
// Test categories query
|
|
try {
|
|
const [categories] = await connection.query(`
|
|
SELECT cat_id, name FROM product_categories WHERE type=10 LIMIT 5
|
|
`);
|
|
results.categories = {
|
|
success: true,
|
|
count: categories.length,
|
|
sample: categories.length > 0 ? categories[0] : null
|
|
};
|
|
} catch (error) {
|
|
results.categories = {
|
|
success: false,
|
|
error: error.message,
|
|
sqlMessage: error.sqlMessage
|
|
};
|
|
}
|
|
|
|
// Test themes query
|
|
try {
|
|
const [themes] = await connection.query(`
|
|
SELECT cat_id, name FROM product_categories WHERE type=20 LIMIT 5
|
|
`);
|
|
results.themes = {
|
|
success: true,
|
|
count: themes.length,
|
|
sample: themes.length > 0 ? themes[0] : null
|
|
};
|
|
} catch (error) {
|
|
results.themes = {
|
|
success: false,
|
|
error: error.message,
|
|
sqlMessage: error.sqlMessage
|
|
};
|
|
}
|
|
|
|
// Test colors query
|
|
try {
|
|
const [colors] = await connection.query(`
|
|
SELECT color, name, hex_color FROM product_color_list ORDER BY \`order\` LIMIT 5
|
|
`);
|
|
results.colors = {
|
|
success: true,
|
|
count: colors.length,
|
|
sample: colors.length > 0 ? colors[0] : null
|
|
};
|
|
} catch (error) {
|
|
results.colors = {
|
|
success: false,
|
|
error: error.message,
|
|
sqlMessage: error.sqlMessage
|
|
};
|
|
}
|
|
|
|
return res.json({
|
|
message: "Test taxonomy queries executed",
|
|
results: results,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
} finally {
|
|
if (connection) await connection.end();
|
|
if (ssh) ssh.end();
|
|
}
|
|
} catch (error) {
|
|
console.error("Test taxonomy endpoint error:", error);
|
|
return res.status(500).json({
|
|
error: error.message,
|
|
stack: error.stack
|
|
});
|
|
}
|
|
});
|
|
|
|
module.exports = router;
|