Lots of new AI tasks tweaks and fixes

This commit is contained in:
2026-01-20 13:15:10 -05:00
parent 167c13c572
commit 1dcb47cfc5
17 changed files with 1202 additions and 264 deletions

View File

@@ -409,8 +409,12 @@ router.post('/validate/sanity-check', async (req, res) => {
return res.status(400).json({ error: 'Products array is required' });
}
// Get pool from app.locals (set by server.js)
const pool = req.app.locals.pool;
const result = await aiService.runTask(aiService.TASK_IDS.SANITY_CHECK, {
products
products,
pool
});
if (!result.success) {

View File

@@ -327,7 +327,8 @@ async function runTask(taskId, payload = {}) {
...payload,
// Inject dependencies tasks may need
provider: groqProvider,
pool: appPool,
// Use pool from payload if provided (from route), fall back to stored appPool
pool: payload.pool || appPool,
logger
});
}

View File

@@ -5,6 +5,33 @@
* System and general prompts are loaded from the database.
*/
/**
* Sanitize an issue string from AI response
* AI sometimes returns malformed strings with escape sequences
*
* @param {string} issue - Raw issue string
* @returns {string} Cleaned issue string
*/
function sanitizeIssue(issue) {
if (!issue || typeof issue !== 'string') return '';
let cleaned = issue
// Remove trailing backslashes (incomplete escapes)
.replace(/\\+$/, '')
// Fix malformed escaped quotes at end of string
.replace(/\\",?\)?$/, '')
// Clean up double-escaped quotes
.replace(/\\\\"/g, '"')
// Clean up single escaped quotes that aren't needed
.replace(/\\"/g, '"')
// Remove any remaining trailing punctuation artifacts
.replace(/[,\s]+$/, '')
// Trim whitespace
.trim();
return cleaned;
}
/**
* Build the user prompt for description validation
* Combines database prompts with product data
@@ -50,13 +77,17 @@ function buildDescriptionUserPrompt(product, prompts) {
// Add response format instructions
parts.push('');
parts.push('If the description is empty or very short, suggest a complete description based on the product name.');
parts.push('CRITICAL RULES:');
parts.push('- If isValid is false, you MUST provide a suggestion with the improved description');
parts.push('- If there are ANY issues, isValid MUST be false and suggestion MUST contain the corrected text');
parts.push('- If the description is empty or very short, write a complete description based on the product name');
parts.push('- Only set isValid to true if there are ZERO issues and the description needs no changes');
parts.push('');
parts.push('RESPOND WITH JSON:');
parts.push(JSON.stringify({
isValid: 'true/false',
suggestion: 'improved description if changes needed, or null if valid',
issues: ['issue 1', 'issue 2 (empty array if valid)']
isValid: 'true if perfect, false if ANY changes needed',
suggestion: 'REQUIRED when isValid is false - the complete improved description',
issues: ['list each problem found (empty array only if isValid is true)']
}, null, 2));
return parts.join('\n');
@@ -72,11 +103,35 @@ function buildDescriptionUserPrompt(product, prompts) {
function parseDescriptionResponse(parsed, content) {
// If we got valid parsed JSON, use it
if (parsed && typeof parsed.isValid === 'boolean') {
return {
isValid: parsed.isValid,
suggestion: parsed.suggestion || null,
issues: Array.isArray(parsed.issues) ? parsed.issues : []
};
// Sanitize issues - AI sometimes returns malformed escape sequences
const rawIssues = Array.isArray(parsed.issues) ? parsed.issues : [];
const issues = rawIssues
.map(sanitizeIssue)
.filter(issue => issue.length > 0);
const suggestion = parsed.suggestion || null;
// IMPORTANT: LLMs sometimes return contradictory data (isValid: true with issues).
// If there are issues, treat as invalid regardless of what the AI said.
// Also if there's a suggestion, the AI thought something needed to change.
const isValid = parsed.isValid && issues.length === 0 && !suggestion;
return { isValid, suggestion, issues };
}
// Handle case where isValid is a string "true"/"false" instead of boolean
if (parsed && typeof parsed.isValid === 'string') {
const rawIssues = Array.isArray(parsed.issues) ? parsed.issues : [];
const issues = rawIssues
.map(sanitizeIssue)
.filter(issue => issue.length > 0);
const suggestion = parsed.suggestion || null;
const rawIsValid = parsed.isValid.toLowerCase() !== 'false';
// Same defensive logic: if there are issues, it's not valid
const isValid = rawIsValid && issues.length === 0 && !suggestion;
return { isValid, suggestion, issues };
}
// Try to extract from content if parsing failed
@@ -100,11 +155,16 @@ function parseDescriptionResponse(parsed, content) {
const issuesContent = issuesMatch[1];
const issueStrings = issuesContent.match(/"([^"]+)"/g);
if (issueStrings) {
issues = issueStrings.map(s => s.replace(/"/g, ''));
issues = issueStrings
.map(s => sanitizeIssue(s.replace(/"/g, '')))
.filter(issue => issue.length > 0);
}
}
return { isValid, suggestion, issues };
// Same logic: if there are issues, it's not valid
const finalIsValid = isValid && issues.length === 0 && !suggestion;
return { isValid: finalIsValid, suggestion, issues };
} catch {
// Default to valid if we can't parse anything
return { isValid: true, suggestion: null, issues: [] };

View File

@@ -5,6 +5,33 @@
* System and general prompts are loaded from the database.
*/
/**
* Sanitize an issue string from AI response
* AI sometimes returns malformed strings with escape sequences
*
* @param {string} issue - Raw issue string
* @returns {string} Cleaned issue string
*/
function sanitizeIssue(issue) {
if (!issue || typeof issue !== 'string') return '';
let cleaned = issue
// Remove trailing backslashes (incomplete escapes)
.replace(/\\+$/, '')
// Fix malformed escaped quotes at end of string
.replace(/\\",?\)?$/, '')
// Clean up double-escaped quotes
.replace(/\\\\"/g, '"')
// Clean up single escaped quotes that aren't needed
.replace(/\\"/g, '"')
// Remove any remaining trailing punctuation artifacts
.replace(/[,\s]+$/, '')
// Trim whitespace
.trim();
return cleaned;
}
/**
* Build the user prompt for name validation
* Combines database prompts with product data
@@ -13,7 +40,9 @@
* @param {string} product.name - Current product name
* @param {string} [product.company_name] - Company name
* @param {string} [product.line_name] - Product line name
* @param {string} [product.subline_name] - Product subline name
* @param {string} [product.description] - Product description (for context)
* @param {string[]} [product.siblingNames] - Names of other products in the same line
* @param {Object} prompts - Prompts loaded from database
* @param {string} prompts.general - General naming conventions
* @param {string} [prompts.companySpecific] - Company-specific rules
@@ -40,11 +69,32 @@ function buildNameUserPrompt(product, prompts) {
parts.push(`NAME: "${product.name || ''}"`);
parts.push(`COMPANY: ${product.company_name || 'Unknown'}`);
parts.push(`LINE: ${product.line_name || 'None'}`);
if (product.subline_name) {
parts.push(`SUBLINE: ${product.subline_name}`);
}
if (product.description) {
parts.push(`DESCRIPTION (for context): ${product.description.substring(0, 200)}`);
}
// Add sibling context for naming decisions
if (product.siblingNames && product.siblingNames.length > 0) {
parts.push('');
parts.push(`OTHER PRODUCTS IN THIS LINE (${product.siblingNames.length + 1} total including this one):`);
product.siblingNames.forEach(name => {
parts.push(`- ${name}`);
});
parts.push('');
parts.push('Use this context to determine:');
parts.push('- If this product needs a differentiator (multiple similar products exist)');
parts.push('- If naming is consistent with sibling products');
parts.push('- Which naming pattern is appropriate (single vs multiple products in line)');
} else if (product.line_name) {
parts.push('');
parts.push('This appears to be the ONLY product in this line (no siblings in current batch).');
parts.push('Use the single-product naming pattern: [Line Name] [Product Name] - [Company]');
}
// Add response format instructions
parts.push('');
parts.push('RESPOND WITH JSON:');
@@ -65,24 +115,62 @@ function buildNameUserPrompt(product, prompts) {
* @returns {Object}
*/
function parseNameResponse(parsed, content) {
// Debug: Log what we're trying to parse
console.log('[parseNameResponse] Input:', {
hasParsed: !!parsed,
parsedIsValid: parsed?.isValid,
parsedType: typeof parsed?.isValid,
contentPreview: content?.substring(0, 3000)
});
// If we got valid parsed JSON, use it
if (parsed && typeof parsed.isValid === 'boolean') {
return {
isValid: parsed.isValid,
suggestion: parsed.suggestion || null,
issues: Array.isArray(parsed.issues) ? parsed.issues : []
};
// Sanitize issues - AI sometimes returns malformed escape sequences
const rawIssues = Array.isArray(parsed.issues) ? parsed.issues : [];
const issues = rawIssues
.map(sanitizeIssue)
.filter(issue => issue.length > 0);
const suggestion = parsed.suggestion || null;
// IMPORTANT: LLMs sometimes return contradictory data (isValid: true with issues).
// If there are issues, treat as invalid regardless of what the AI said.
const isValid = parsed.isValid && issues.length === 0 && !suggestion;
return { isValid, suggestion, issues };
}
// Handle case where isValid is a string "true"/"false" instead of boolean
if (parsed && typeof parsed.isValid === 'string') {
const rawIssues = Array.isArray(parsed.issues) ? parsed.issues : [];
const issues = rawIssues
.map(sanitizeIssue)
.filter(issue => issue.length > 0);
const suggestion = parsed.suggestion || null;
const rawIsValid = parsed.isValid.toLowerCase() !== 'false';
// Same defensive logic: if there are issues, it's not valid
const isValid = rawIsValid && issues.length === 0 && !suggestion;
console.log('[parseNameResponse] Parsed isValid as string:', parsed.isValid, '→', isValid);
return { isValid, suggestion, issues };
}
// Try to extract from content if parsing failed
try {
// Look for isValid pattern
const isValidMatch = content.match(/"isValid"\s*:\s*(true|false)/i);
// Look for isValid pattern - handle both boolean and quoted string
// Matches: "isValid": true, "isValid": false, "isValid": "true", "isValid": "false"
const isValidMatch = content.match(/"isValid"\s*:\s*"?(true|false)"?/i);
const isValid = isValidMatch ? isValidMatch[1].toLowerCase() === 'true' : true;
// Look for suggestion
const suggestionMatch = content.match(/"suggestion"\s*:\s*"([^"]+)"/);
const suggestion = suggestionMatch ? suggestionMatch[1] : null;
console.log('[parseNameResponse] Regex extraction:', {
isValidMatch: isValidMatch?.[0],
isValidValue: isValidMatch?.[1],
resultIsValid: isValid
});
// Look for suggestion - handle escaped quotes and null
const suggestionMatch = content.match(/"suggestion"\s*:\s*(?:"([^"\\]*(?:\\.[^"\\]*)*)"|null)/);
const suggestion = suggestionMatch ? (suggestionMatch[1] || null) : null;
// Look for issues array
const issuesMatch = content.match(/"issues"\s*:\s*\[([\s\S]*?)\]/);
@@ -91,11 +179,16 @@ function parseNameResponse(parsed, content) {
const issuesContent = issuesMatch[1];
const issueStrings = issuesContent.match(/"([^"]+)"/g);
if (issueStrings) {
issues = issueStrings.map(s => s.replace(/"/g, ''));
issues = issueStrings
.map(s => sanitizeIssue(s.replace(/"/g, '')))
.filter(issue => issue.length > 0);
}
}
return { isValid, suggestion, issues };
// Same defensive logic: if there are issues, it's not valid
const finalIsValid = isValid && issues.length === 0 && !suggestion;
return { isValid: finalIsValid, suggestion, issues };
} catch {
// Default to valid if we can't parse anything
return { isValid: true, suggestion: null, issues: [] };

View File

@@ -63,8 +63,33 @@ class GroqProvider {
body.response_format = { type: 'json_object' };
}
// Debug: Log request being sent
console.log('[Groq] Request:', {
model: body.model,
temperature: body.temperature,
maxTokens: body.max_completion_tokens,
hasResponseFormat: !!body.response_format,
messageCount: body.messages?.length,
systemPromptLength: body.messages?.[0]?.content?.length,
userPromptLength: body.messages?.[1]?.content?.length
});
const response = await this._makeRequest('chat/completions', body, timeoutMs);
// Debug: Log raw response structure
console.log('[Groq] Raw response:', {
hasChoices: !!response.choices,
choicesLength: response.choices?.length,
firstChoice: response.choices?.[0] ? {
finishReason: response.choices[0].finish_reason,
hasMessage: !!response.choices[0].message,
contentLength: response.choices[0].message?.content?.length,
contentPreview: response.choices[0].message?.content?.substring(0, 200)
} : null,
usage: response.usage,
model: response.model
});
const content = response.choices?.[0]?.message?.content || '';
const usage = response.usage || {};

View File

@@ -89,16 +89,25 @@ function createDescriptionValidationTask() {
],
model: MODELS.LARGE, // openai/gpt-oss-120b - better for content analysis
temperature: 0.3, // Slightly higher for creative suggestions
maxTokens: 500, // More tokens for description suggestions
maxTokens: 2000, // Reasoning models need extra tokens for thinking
responseFormat: { type: 'json_object' }
});
// Log full raw response for debugging
log.info('[DescriptionValidation] Raw AI response:', {
parsed: response.parsed,
content: response.content,
contentLength: response.content?.length
});
// Parse the response
result = parseDescriptionResponse(response.parsed, response.content);
} catch (jsonError) {
// If JSON mode failed, check if we have failedGeneration to parse
if (jsonError.failedGeneration) {
log.warn('[DescriptionValidation] JSON mode failed, attempting to parse failed_generation');
log.warn('[DescriptionValidation] JSON mode failed, attempting to parse failed_generation:', {
failedGeneration: jsonError.failedGeneration
});
result = parseDescriptionResponse(null, jsonError.failedGeneration);
response = { latencyMs: 0, usage: {}, model: MODELS.LARGE };
} else {
@@ -111,9 +120,14 @@ function createDescriptionValidationTask() {
],
model: MODELS.LARGE,
temperature: 0.3,
maxTokens: 500
maxTokens: 2000 // Reasoning models need extra tokens for thinking
// No responseFormat - let the model respond freely
});
log.info('[DescriptionValidation] Raw AI response (no JSON mode):', {
parsed: response.parsed,
content: response.content,
contentLength: response.content?.length
});
result = parseDescriptionResponse(response.parsed, response.content);
}
}

View File

@@ -71,12 +71,26 @@ function createNameValidationTask() {
const companyKey = product.company_id || product.company_name || product.company;
const prompts = await loadNameValidationPrompts(pool, companyKey);
// Debug: Log loaded prompts
log.info('[NameValidation] Loaded prompts:', {
hasSystem: !!prompts.system,
systemLength: prompts.system?.length || 0,
hasGeneral: !!prompts.general,
generalLength: prompts.general?.length || 0,
generalPreview: prompts.general?.substring(0, 100) || '(empty)',
hasCompanySpecific: !!prompts.companySpecific,
companyKey
});
// Validate required prompts exist
validateRequiredPrompts(prompts, 'name_validation');
// Build the user prompt with database-loaded prompts
const userPrompt = buildNameUserPrompt(product, prompts);
// Debug: Log the full user prompt being sent
log.info('[NameValidation] User prompt:', userPrompt.substring(0, 500));
let response;
let result;
@@ -87,18 +101,27 @@ function createNameValidationTask() {
{ role: 'system', content: prompts.system },
{ role: 'user', content: userPrompt }
],
model: MODELS.SMALL, // openai/gpt-oss-20b - fast for simple tasks
model: MODELS.SMALL, // openai/gpt-oss-20b - reasoning model
temperature: 0.2, // Low temperature for consistent results
maxTokens: 300,
maxTokens: 1500, // Reasoning models need extra tokens for thinking
responseFormat: { type: 'json_object' }
});
// Log full raw response for debugging
log.info('[NameValidation] Raw AI response:', {
parsed: response.parsed,
content: response.content,
contentLength: response.content?.length
});
// Parse the response
result = parseNameResponse(response.parsed, response.content);
} catch (jsonError) {
// If JSON mode failed, check if we have failedGeneration to parse
if (jsonError.failedGeneration) {
log.warn('[NameValidation] JSON mode failed, attempting to parse failed_generation');
log.warn('[NameValidation] JSON mode failed, attempting to parse failed_generation:', {
failedGeneration: jsonError.failedGeneration
});
result = parseNameResponse(null, jsonError.failedGeneration);
response = { latencyMs: 0, usage: {}, model: MODELS.SMALL };
} else {
@@ -111,9 +134,14 @@ function createNameValidationTask() {
],
model: MODELS.SMALL,
temperature: 0.2,
maxTokens: 300
maxTokens: 1500 // Reasoning models need extra tokens for thinking
// No responseFormat - let the model respond freely
});
log.info('[NameValidation] Raw AI response (no JSON mode):', {
parsed: response.parsed,
content: response.content,
contentLength: response.content?.length
});
result = parseNameResponse(response.parsed, response.content);
}
}