Fix line/subline regressions, add in AI validation tracking and improve AI results dialog

This commit is contained in:
2025-02-26 00:38:17 -05:00
parent 2df5428712
commit 6b101a91f6
5 changed files with 758 additions and 85 deletions

View File

@@ -23,6 +23,20 @@ CREATE TABLE IF NOT EXISTS templates (
UNIQUE(company, product_type)
);
-- AI Validation Performance Tracking
CREATE TABLE IF NOT EXISTS ai_validation_performance (
id SERIAL PRIMARY KEY,
prompt_length INTEGER NOT NULL,
product_count INTEGER NOT NULL,
start_time TIMESTAMP WITH TIME ZONE NOT NULL,
end_time TIMESTAMP WITH TIME ZONE NOT NULL,
duration_seconds DECIMAL(10,2) GENERATED ALWAYS AS (EXTRACT(EPOCH FROM (end_time - start_time))) STORED,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Create index on prompt_length for efficient querying
CREATE INDEX IF NOT EXISTS idx_ai_validation_prompt_length ON ai_validation_performance(prompt_length);
-- Function to update the updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$

View File

@@ -7,6 +7,8 @@ Your response should be a JSON object with the following structure:
"warnings": [] // Array of strings with warnings or suggestions for manual review (see below for details)
}
IMPORTANT: For all fields that use IDs (categories, supplier, company, line, subline, ship_restrictions, tax_cat, artist, themes, etc.), you MUST return the ID values, not the display names. The system will handle converting IDs to display names.
Using the provided guidelines, focus on:
1. Correcting typos and any incorrect spelling or grammar
2. Standardizing product names
@@ -93,7 +95,7 @@ Instructions: Always return a valid numerical tax code ID from the Available Tax
Fields: size_cat
Changes: Allowed to correct obvious errors or inconsistencies or to add missing values
Required: Return if present in the original data or if not present and applicable. Do not return if not applicable (e.g. if no size categories apply based on what you know about the product).
Instructions: If present or if applicable, return one valid numerical size category ID from the Available Size Categories array below. Give preference to the value provided, but correct it if another value is more accurate. A value is not required if none of the size categories apply, but it's important to include if one clearly applies, such as if the name contains 12x12, 6x8, 2oz, etc.
Instructions: If present or if applicable, return one valid numerical size category ID from the Available Size Categories array below. Give preference to the value provided, but correct it if another value is more accurate. If the product name contains a match for one of the size categories (such as 12x12, 6x6, 2oz, etc) you MUST return that size category with the results. A value is not required if none of the size categories apply.
Fields: themes
Changes: Allowed to correct obvious errors or inconsistencies or to add missing values

View File

@@ -98,7 +98,51 @@ router.post("/debug", async (req, res) => {
});
try {
return await generateDebugResponse(cleanedProducts, res);
const debugResponse = await generateDebugResponse(cleanedProducts, res);
// Get estimated processing time based on prompt length
if (debugResponse && debugResponse.promptLength) {
try {
// Use the pool from the app
const pool = req.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for time estimates");
return;
}
try {
const avgTimeResults = await pool.query(
`SELECT AVG(duration_seconds) as avg_duration,
COUNT(*) as sample_count
FROM ai_validation_performance
WHERE prompt_length BETWEEN $1 * 0.8 AND $1 * 1.2`,
[debugResponse.promptLength]
);
// Add estimated time to the response
if (avgTimeResults.rows && avgTimeResults.rows[0]) {
debugResponse.estimatedProcessingTime = {
seconds: avgTimeResults.rows[0].avg_duration || null,
sampleCount: avgTimeResults.rows[0].sample_count || 0
};
console.log("📊 Retrieved processing time estimate:", debugResponse.estimatedProcessingTime);
} else {
console.log("📊 No processing time estimates available for prompt length:", debugResponse.promptLength);
}
} catch (queryError) {
console.error("⚠️ Failed to query performance metrics:", queryError);
// Check if table doesn't exist and log a more helpful message
if (queryError.code === '42P01') {
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
}
}
} catch (timeEstimateError) {
console.error("Error getting time estimate:", timeEstimateError);
// Don't fail the request if time estimate fails
}
}
return res.json(debugResponse);
} catch (generateError) {
console.error("Error generating debug response:", generateError);
return res.status(500).json({
@@ -271,7 +315,7 @@ async function generateDebugResponse(productsToUse, res) {
};
console.log("Sending response with taxonomy stats:", response.taxonomyStats);
return res.json(response);
return response;
} finally {
if (promptConnection) await promptConnection.end();
if (promptTunnel.ssh) promptTunnel.ssh.end();
@@ -463,9 +507,7 @@ async function loadPrompt(connection, productsToValidate = null) {
const taxonomy = await getTaxonomyData(connection);
// Add system instructions to the prompt
const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone.
`;
const systemInstructions = `You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone. You have meticulous attention to detail and are a master at your craft.`;
// If we have products to validate, create a filtered prompt
if (productsToValidate) {
@@ -634,6 +676,7 @@ Here is the product data to validate:`;
router.post("/validate", async (req, res) => {
try {
const { products } = req.body;
const startTime = new Date(); // Track start time for performance metrics
console.log("🔍 Received products for validation:", {
isArray: Array.isArray(products),
@@ -654,6 +697,7 @@ router.post("/validate", async (req, res) => {
let ssh = null;
let connection = null;
let promptLength = 0; // Track prompt length for performance metrics
try {
// Setup MySQL connection via SSH tunnel
@@ -672,7 +716,8 @@ router.post("/validate", async (req, res) => {
console.log("🔄 Loading prompt with filtered taxonomy...");
const prompt = await loadPrompt(connection, products);
const fullPrompt = prompt + "\n" + JSON.stringify(products);
console.log("📝 Generated prompt length:", fullPrompt.length);
promptLength = fullPrompt.length; // Store prompt length for performance metrics
console.log("📝 Generated prompt length:", promptLength);
console.log("🤖 Sending request to OpenAI...");
const completion = await openai.chat.completions.create({
@@ -698,17 +743,27 @@ router.post("/validate", async (req, res) => {
Object.keys(aiResponse)
);
// Create a detailed comparison between original and corrected data
const changeDetails = [];
// Compare original and corrected data
if (aiResponse.correctedData) {
console.log("📊 Changes summary:");
products.forEach((original, index) => {
const corrected = aiResponse.correctedData[index];
if (corrected) {
const productChanges = {
productIndex: index,
title: original.title || `Product ${index + 1}`,
changes: []
};
const changes = Object.keys(corrected).filter(
(key) =>
JSON.stringify(original[key]) !==
JSON.stringify(corrected[key])
);
if (changes.length > 0) {
console.log(`\nProduct ${index + 1} changes:`);
changes.forEach((key) => {
@@ -719,14 +774,87 @@ router.post("/validate", async (req, res) => {
console.log(
` - Corrected: ${JSON.stringify(corrected[key])}`
);
// Add to our detailed changes array
productChanges.changes.push({
field: key,
original: original[key],
corrected: corrected[key]
});
});
// Only add products that have changes
if (productChanges.changes.length > 0) {
changeDetails.push(productChanges);
}
}
}
});
}
// Record performance metrics after successful validation
const endTime = new Date();
let performanceMetrics = {
promptLength,
productCount: products.length,
processingTimeSeconds: (endTime - startTime) / 1000
};
try {
// Use the local PostgreSQL pool from the app instead of the MySQL connection
const pool = req.app.locals.pool;
if (!pool) {
console.warn("⚠️ Local database pool not available for recording metrics");
return;
}
try {
// Insert performance data into the local PostgreSQL database
await pool.query(
`INSERT INTO ai_validation_performance
(prompt_length, product_count, start_time, end_time)
VALUES ($1, $2, $3, $4)`,
[promptLength, products.length, startTime, endTime]
);
console.log("📊 Performance metrics inserted into database");
// Query for average processing time based on similar prompt lengths
try {
const avgTimeResults = await pool.query(
`SELECT AVG(duration_seconds) as avg_duration,
COUNT(*) as sample_count
FROM ai_validation_performance
WHERE prompt_length BETWEEN $1 * 0.8 AND $1 * 1.2`,
[promptLength]
);
if (avgTimeResults.rows && avgTimeResults.rows[0]) {
performanceMetrics.avgDuration = avgTimeResults.rows[0].avg_duration;
performanceMetrics.sampleCount = avgTimeResults.rows[0].sample_count;
}
console.log("📊 Performance metrics retrieved:", performanceMetrics);
} catch (queryError) {
console.error("⚠️ Failed to query performance metrics:", queryError);
}
} catch (insertError) {
console.error("⚠️ Failed to insert performance metrics:", insertError);
// Check if table doesn't exist and log a more helpful message
if (insertError.code === '42P01') {
console.error("Table 'ai_validation_performance' does not exist. Make sure to run the setup-schema.sql script.");
}
}
} catch (metricError) {
// Don't fail the request if metrics recording fails
console.error("⚠️ Failed to record performance metrics:", metricError);
}
// Include performance metrics in the response
res.json({
success: true,
changeDetails: changeDetails,
performanceMetrics,
...aiResponse,
});
} catch (parseError) {