387 lines
9.9 KiB
JavaScript
387 lines
9.9 KiB
JavaScript
/**
|
|
* AI Service
|
|
*
|
|
* Main entry point for AI functionality including:
|
|
* - Embeddings for taxonomy suggestions (OpenAI)
|
|
* - Chat completions for validation tasks (Groq)
|
|
* - Task registry for AI operations
|
|
*/
|
|
|
|
const { OpenAIProvider } = require('./providers/openaiProvider');
|
|
const { GroqProvider, MODELS: GROQ_MODELS } = require('./providers/groqProvider');
|
|
const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings');
|
|
const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity');
|
|
const { getRegistry, TASK_IDS, registerAllTasks } = require('./tasks');
|
|
|
|
let initialized = false;
|
|
let initializing = false;
|
|
let openaiProvider = null;
|
|
let groqProvider = null;
|
|
let taxonomyEmbeddings = null;
|
|
let logger = console;
|
|
|
|
// Store pool reference for task access
|
|
let appPool = null;
|
|
|
|
/**
|
|
* Initialize the AI service
|
|
* @param {Object} options
|
|
* @param {string} options.openaiApiKey - OpenAI API key (for embeddings)
|
|
* @param {string} [options.groqApiKey] - Groq API key (for chat completions)
|
|
* @param {Object} options.mysqlConnection - MySQL connection for taxonomy data
|
|
* @param {Object} [options.pool] - PostgreSQL pool for prompt loading
|
|
* @param {Object} [options.logger] - Logger instance
|
|
*/
|
|
async function initialize({ openaiApiKey, groqApiKey, mysqlConnection, pool, logger: customLogger }) {
|
|
if (initialized) {
|
|
return { success: true, message: 'Already initialized' };
|
|
}
|
|
|
|
if (initializing) {
|
|
// Wait for existing initialization
|
|
while (initializing) {
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
}
|
|
return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' };
|
|
}
|
|
|
|
initializing = true;
|
|
|
|
try {
|
|
if (customLogger) {
|
|
logger = customLogger;
|
|
}
|
|
|
|
if (!openaiApiKey) {
|
|
throw new Error('OpenAI API key is required');
|
|
}
|
|
|
|
logger.info('[AI] Initializing AI service...');
|
|
|
|
// Store pool reference for tasks
|
|
if (pool) {
|
|
appPool = pool;
|
|
}
|
|
|
|
// Create OpenAI provider (for embeddings)
|
|
openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey });
|
|
|
|
// Create Groq provider (for chat completions) if API key provided
|
|
if (groqApiKey) {
|
|
groqProvider = new GroqProvider({ apiKey: groqApiKey });
|
|
logger.info('[AI] Groq provider initialized for chat completions');
|
|
} else {
|
|
logger.warn('[AI] No Groq API key provided - chat completion tasks will not be available');
|
|
}
|
|
|
|
// Create and initialize taxonomy embeddings
|
|
taxonomyEmbeddings = new TaxonomyEmbeddings({
|
|
provider: openaiProvider,
|
|
logger
|
|
});
|
|
|
|
const stats = await taxonomyEmbeddings.initialize(mysqlConnection);
|
|
|
|
// Register validation tasks if Groq is available
|
|
if (groqProvider) {
|
|
registerValidationTasks();
|
|
}
|
|
|
|
initialized = true;
|
|
logger.info('[AI] AI service initialized', {
|
|
...stats,
|
|
groqEnabled: !!groqProvider,
|
|
tasksRegistered: getRegistry().list()
|
|
});
|
|
|
|
return {
|
|
success: true,
|
|
message: 'Initialized',
|
|
stats,
|
|
groqEnabled: !!groqProvider
|
|
};
|
|
} catch (error) {
|
|
logger.error('[AI] Initialization failed:', error);
|
|
return { success: false, message: error.message };
|
|
} finally {
|
|
initializing = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Register validation tasks with the task registry
|
|
* Called during initialization if Groq is available
|
|
*/
|
|
function registerValidationTasks() {
|
|
registerAllTasks(logger);
|
|
logger.info('[AI] Validation tasks registered');
|
|
}
|
|
|
|
/**
|
|
* Check if service is ready
|
|
*/
|
|
function isReady() {
|
|
return initialized && taxonomyEmbeddings?.isReady();
|
|
}
|
|
|
|
/**
|
|
* Build weighted product text for embedding.
|
|
* Weights the product name heavily by repeating it, and truncates long descriptions
|
|
* to prevent verbose marketing copy from drowning out the product signal.
|
|
*
|
|
* @param {Object} product - Product with name, description, company, line
|
|
* @returns {string} - Combined text for embedding
|
|
*/
|
|
function buildProductText(product) {
|
|
const parts = [];
|
|
const name = product.name?.trim();
|
|
const description = product.description?.trim();
|
|
const company = (product.company_name || product.company)?.trim();
|
|
const line = (product.line_name || product.line)?.trim();
|
|
|
|
// Name is most important - repeat 3x to weight it heavily in the embedding
|
|
if (name) {
|
|
parts.push(name, name, name);
|
|
}
|
|
|
|
// Company and line provide context
|
|
if (company) {
|
|
parts.push(company);
|
|
}
|
|
if (line) {
|
|
parts.push(line);
|
|
}
|
|
|
|
// Truncate description to prevent it from overwhelming the signal
|
|
if (description) {
|
|
const truncated = description.length > 500
|
|
? description.substring(0, 500) + '...'
|
|
: description;
|
|
parts.push(truncated);
|
|
}
|
|
|
|
return parts.join(' ').trim();
|
|
}
|
|
|
|
/**
|
|
* Generate embedding for a product
|
|
* @param {Object} product - Product with name, description, company, line
|
|
* @returns {Promise<{embedding: number[], latencyMs: number}>}
|
|
*/
|
|
async function getProductEmbedding(product) {
|
|
if (!initialized || !openaiProvider) {
|
|
throw new Error('AI service not initialized');
|
|
}
|
|
|
|
const text = buildProductText(product);
|
|
|
|
if (!text) {
|
|
return { embedding: null, latencyMs: 0 };
|
|
}
|
|
|
|
const result = await openaiProvider.embed(text);
|
|
|
|
return {
|
|
embedding: result.embeddings[0],
|
|
latencyMs: result.latencyMs
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate embeddings for multiple products
|
|
* @param {Object[]} products - Array of products
|
|
* @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>}
|
|
*/
|
|
async function getProductEmbeddings(products) {
|
|
if (!initialized || !openaiProvider) {
|
|
throw new Error('AI service not initialized');
|
|
}
|
|
|
|
const texts = products.map(buildProductText);
|
|
|
|
// Track which products have empty text
|
|
const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0);
|
|
const validTexts = texts.filter(t => t);
|
|
|
|
if (validTexts.length === 0) {
|
|
return { embeddings: [], latencyMs: 0 };
|
|
}
|
|
|
|
const result = await openaiProvider.embed(validTexts);
|
|
|
|
// Map embeddings back to original indices
|
|
const embeddings = validIndices.map((originalIndex, resultIndex) => ({
|
|
index: originalIndex,
|
|
embedding: result.embeddings[resultIndex]
|
|
}));
|
|
|
|
return {
|
|
embeddings,
|
|
latencyMs: result.latencyMs
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Find similar taxonomy items for a product embedding
|
|
* @param {number[]} productEmbedding
|
|
* @param {Object} options
|
|
* @returns {{categories: Array, themes: Array, colors: Array}}
|
|
*/
|
|
function findSimilarTaxonomy(productEmbedding, options = {}) {
|
|
if (!initialized || !taxonomyEmbeddings) {
|
|
throw new Error('AI service not initialized');
|
|
}
|
|
|
|
const topCategories = options.topCategories ?? 10;
|
|
const topThemes = options.topThemes ?? 5;
|
|
const topColors = options.topColors ?? 5;
|
|
|
|
return {
|
|
categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories),
|
|
themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes),
|
|
colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors)
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get product embedding and find similar taxonomy in one call
|
|
* @param {Object} product
|
|
* @param {Object} options
|
|
*/
|
|
async function getSuggestionsForProduct(product, options = {}) {
|
|
const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product);
|
|
|
|
if (!embedding) {
|
|
return {
|
|
categories: [],
|
|
themes: [],
|
|
colors: [],
|
|
latencyMs: embeddingLatency
|
|
};
|
|
}
|
|
|
|
const startSearch = Date.now();
|
|
const suggestions = findSimilarTaxonomy(embedding, options);
|
|
const searchLatency = Date.now() - startSearch;
|
|
|
|
return {
|
|
...suggestions,
|
|
latencyMs: embeddingLatency + searchLatency,
|
|
embeddingLatencyMs: embeddingLatency,
|
|
searchLatencyMs: searchLatency
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get all taxonomy data (without embeddings) for frontend
|
|
*/
|
|
function getTaxonomyData() {
|
|
if (!initialized || !taxonomyEmbeddings) {
|
|
throw new Error('AI service not initialized');
|
|
}
|
|
|
|
return taxonomyEmbeddings.getTaxonomyData();
|
|
}
|
|
|
|
/**
|
|
* Get service status
|
|
*/
|
|
function getStatus() {
|
|
const registry = getRegistry();
|
|
|
|
return {
|
|
initialized,
|
|
ready: isReady(),
|
|
hasOpenAI: !!openaiProvider,
|
|
hasGroq: !!groqProvider,
|
|
hasTaxonomy: !!taxonomyEmbeddings,
|
|
taxonomyStats: taxonomyEmbeddings ? {
|
|
categories: taxonomyEmbeddings.categories?.length || 0,
|
|
themes: taxonomyEmbeddings.themes?.length || 0,
|
|
colors: taxonomyEmbeddings.colors?.length || 0
|
|
} : null,
|
|
tasks: {
|
|
registered: registry.list(),
|
|
count: registry.size()
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Run an AI task by ID
|
|
* @param {string} taskId - Task identifier from TASK_IDS
|
|
* @param {Object} payload - Task-specific input
|
|
* @returns {Promise<Object>} Task result
|
|
*/
|
|
async function runTask(taskId, payload = {}) {
|
|
if (!initialized) {
|
|
throw new Error('AI service not initialized');
|
|
}
|
|
|
|
if (!groqProvider) {
|
|
throw new Error('Groq provider not available - chat completion tasks require GROQ_API_KEY');
|
|
}
|
|
|
|
const registry = getRegistry();
|
|
return registry.runTask(taskId, {
|
|
...payload,
|
|
// Inject dependencies tasks may need
|
|
provider: groqProvider,
|
|
// Use pool from payload if provided (from route), fall back to stored appPool
|
|
pool: payload.pool || appPool,
|
|
logger
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get the Groq provider instance (for direct use if needed)
|
|
* @returns {GroqProvider|null}
|
|
*/
|
|
function getGroqProvider() {
|
|
return groqProvider;
|
|
}
|
|
|
|
/**
|
|
* Get the PostgreSQL pool (for tasks that need DB access)
|
|
* @returns {Object|null}
|
|
*/
|
|
function getPool() {
|
|
return appPool;
|
|
}
|
|
|
|
/**
|
|
* Check if chat completion tasks are available
|
|
* @returns {boolean}
|
|
*/
|
|
function hasChatCompletion() {
|
|
return !!groqProvider;
|
|
}
|
|
|
|
module.exports = {
|
|
// Initialization
|
|
initialize,
|
|
isReady,
|
|
getStatus,
|
|
|
|
// Embeddings (OpenAI)
|
|
getProductEmbedding,
|
|
getProductEmbeddings,
|
|
findSimilarTaxonomy,
|
|
getSuggestionsForProduct,
|
|
getTaxonomyData,
|
|
|
|
// Chat completions (Groq)
|
|
runTask,
|
|
hasChatCompletion,
|
|
getGroqProvider,
|
|
getPool,
|
|
|
|
// Constants
|
|
TASK_IDS,
|
|
GROQ_MODELS,
|
|
|
|
// Re-export utilities
|
|
cosineSimilarity,
|
|
findTopMatches
|
|
};
|