/** * AI Service * * Main entry point for AI functionality including: * - Embeddings for taxonomy suggestions (OpenAI) * - Chat completions for validation tasks (Groq) * - Task registry for AI operations */ const { OpenAIProvider } = require('./providers/openaiProvider'); const { GroqProvider, MODELS: GROQ_MODELS } = require('./providers/groqProvider'); const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings'); const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity'); const { getRegistry, TASK_IDS, registerAllTasks } = require('./tasks'); let initialized = false; let initializing = false; let openaiProvider = null; let groqProvider = null; let taxonomyEmbeddings = null; let logger = console; // Store pool reference for task access let appPool = null; /** * Initialize the AI service * @param {Object} options * @param {string} options.openaiApiKey - OpenAI API key (for embeddings) * @param {string} [options.groqApiKey] - Groq API key (for chat completions) * @param {Object} options.mysqlConnection - MySQL connection for taxonomy data * @param {Object} [options.pool] - PostgreSQL pool for prompt loading * @param {Object} [options.logger] - Logger instance */ async function initialize({ openaiApiKey, groqApiKey, mysqlConnection, pool, logger: customLogger }) { if (initialized) { return { success: true, message: 'Already initialized' }; } if (initializing) { // Wait for existing initialization while (initializing) { await new Promise(resolve => setTimeout(resolve, 100)); } return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' }; } initializing = true; try { if (customLogger) { logger = customLogger; } if (!openaiApiKey) { throw new Error('OpenAI API key is required'); } logger.info('[AI] Initializing AI service...'); // Store pool reference for tasks if (pool) { appPool = pool; } // Create OpenAI provider (for embeddings) openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey }); // Create Groq provider (for chat completions) if API key provided if (groqApiKey) { groqProvider = new GroqProvider({ apiKey: groqApiKey }); logger.info('[AI] Groq provider initialized for chat completions'); } else { logger.warn('[AI] No Groq API key provided - chat completion tasks will not be available'); } // Create and initialize taxonomy embeddings taxonomyEmbeddings = new TaxonomyEmbeddings({ provider: openaiProvider, logger }); const stats = await taxonomyEmbeddings.initialize(mysqlConnection); // Register validation tasks if Groq is available if (groqProvider) { registerValidationTasks(); } initialized = true; logger.info('[AI] AI service initialized', { ...stats, groqEnabled: !!groqProvider, tasksRegistered: getRegistry().list() }); return { success: true, message: 'Initialized', stats, groqEnabled: !!groqProvider }; } catch (error) { logger.error('[AI] Initialization failed:', error); return { success: false, message: error.message }; } finally { initializing = false; } } /** * Register validation tasks with the task registry * Called during initialization if Groq is available */ function registerValidationTasks() { registerAllTasks(logger); logger.info('[AI] Validation tasks registered'); } /** * Check if service is ready */ function isReady() { return initialized && taxonomyEmbeddings?.isReady(); } /** * Build weighted product text for embedding. * Weights the product name heavily by repeating it, and truncates long descriptions * to prevent verbose marketing copy from drowning out the product signal. * * @param {Object} product - Product with name, description, company, line * @returns {string} - Combined text for embedding */ function buildProductText(product) { const parts = []; const name = product.name?.trim(); const description = product.description?.trim(); const company = (product.company_name || product.company)?.trim(); const line = (product.line_name || product.line)?.trim(); // Name is most important - repeat 3x to weight it heavily in the embedding if (name) { parts.push(name, name, name); } // Company and line provide context if (company) { parts.push(company); } if (line) { parts.push(line); } // Truncate description to prevent it from overwhelming the signal if (description) { const truncated = description.length > 500 ? description.substring(0, 500) + '...' : description; parts.push(truncated); } return parts.join(' ').trim(); } /** * Generate embedding for a product * @param {Object} product - Product with name, description, company, line * @returns {Promise<{embedding: number[], latencyMs: number}>} */ async function getProductEmbedding(product) { if (!initialized || !openaiProvider) { throw new Error('AI service not initialized'); } const text = buildProductText(product); if (!text) { return { embedding: null, latencyMs: 0 }; } const result = await openaiProvider.embed(text); return { embedding: result.embeddings[0], latencyMs: result.latencyMs }; } /** * Generate embeddings for multiple products * @param {Object[]} products - Array of products * @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>} */ async function getProductEmbeddings(products) { if (!initialized || !openaiProvider) { throw new Error('AI service not initialized'); } const texts = products.map(buildProductText); // Track which products have empty text const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0); const validTexts = texts.filter(t => t); if (validTexts.length === 0) { return { embeddings: [], latencyMs: 0 }; } const result = await openaiProvider.embed(validTexts); // Map embeddings back to original indices const embeddings = validIndices.map((originalIndex, resultIndex) => ({ index: originalIndex, embedding: result.embeddings[resultIndex] })); return { embeddings, latencyMs: result.latencyMs }; } /** * Find similar taxonomy items for a product embedding * @param {number[]} productEmbedding * @param {Object} options * @returns {{categories: Array, themes: Array, colors: Array}} */ function findSimilarTaxonomy(productEmbedding, options = {}) { if (!initialized || !taxonomyEmbeddings) { throw new Error('AI service not initialized'); } const topCategories = options.topCategories ?? 10; const topThemes = options.topThemes ?? 5; const topColors = options.topColors ?? 5; return { categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories), themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes), colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors) }; } /** * Get product embedding and find similar taxonomy in one call * @param {Object} product * @param {Object} options */ async function getSuggestionsForProduct(product, options = {}) { const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product); if (!embedding) { return { categories: [], themes: [], colors: [], latencyMs: embeddingLatency }; } const startSearch = Date.now(); const suggestions = findSimilarTaxonomy(embedding, options); const searchLatency = Date.now() - startSearch; return { ...suggestions, latencyMs: embeddingLatency + searchLatency, embeddingLatencyMs: embeddingLatency, searchLatencyMs: searchLatency }; } /** * Get all taxonomy data (without embeddings) for frontend */ function getTaxonomyData() { if (!initialized || !taxonomyEmbeddings) { throw new Error('AI service not initialized'); } return taxonomyEmbeddings.getTaxonomyData(); } /** * Get service status */ function getStatus() { const registry = getRegistry(); return { initialized, ready: isReady(), hasOpenAI: !!openaiProvider, hasGroq: !!groqProvider, hasTaxonomy: !!taxonomyEmbeddings, taxonomyStats: taxonomyEmbeddings ? { categories: taxonomyEmbeddings.categories?.length || 0, themes: taxonomyEmbeddings.themes?.length || 0, colors: taxonomyEmbeddings.colors?.length || 0 } : null, tasks: { registered: registry.list(), count: registry.size() } }; } /** * Run an AI task by ID * @param {string} taskId - Task identifier from TASK_IDS * @param {Object} payload - Task-specific input * @returns {Promise} Task result */ async function runTask(taskId, payload = {}) { if (!initialized) { throw new Error('AI service not initialized'); } if (!groqProvider) { throw new Error('Groq provider not available - chat completion tasks require GROQ_API_KEY'); } const registry = getRegistry(); return registry.runTask(taskId, { ...payload, // Inject dependencies tasks may need provider: groqProvider, // Use pool from payload if provided (from route), fall back to stored appPool pool: payload.pool || appPool, logger }); } /** * Get the Groq provider instance (for direct use if needed) * @returns {GroqProvider|null} */ function getGroqProvider() { return groqProvider; } /** * Get the PostgreSQL pool (for tasks that need DB access) * @returns {Object|null} */ function getPool() { return appPool; } /** * Check if chat completion tasks are available * @returns {boolean} */ function hasChatCompletion() { return !!groqProvider; } module.exports = { // Initialization initialize, isReady, getStatus, // Embeddings (OpenAI) getProductEmbedding, getProductEmbeddings, findSimilarTaxonomy, getSuggestionsForProduct, getTaxonomyData, // Chat completions (Groq) runTask, hasChatCompletion, getGroqProvider, getPool, // Constants TASK_IDS, GROQ_MODELS, // Re-export utilities cosineSimilarity, findTopMatches };