Optimize orders import
This commit is contained in:
@@ -1,386 +0,0 @@
|
||||
/**
|
||||
* AI Service
|
||||
*
|
||||
* Main entry point for AI functionality including:
|
||||
* - Embeddings for taxonomy suggestions (OpenAI)
|
||||
* - Chat completions for validation tasks (Groq)
|
||||
* - Task registry for AI operations
|
||||
*/
|
||||
|
||||
const { OpenAIProvider } = require('./providers/openaiProvider');
|
||||
const { GroqProvider, MODELS: GROQ_MODELS } = require('./providers/groqProvider');
|
||||
const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings');
|
||||
const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity');
|
||||
const { getRegistry, TASK_IDS, registerAllTasks } = require('./tasks');
|
||||
|
||||
let initialized = false;
|
||||
let initializing = false;
|
||||
let openaiProvider = null;
|
||||
let groqProvider = null;
|
||||
let taxonomyEmbeddings = null;
|
||||
let logger = console;
|
||||
|
||||
// Store pool reference for task access
|
||||
let appPool = null;
|
||||
|
||||
/**
|
||||
* Initialize the AI service
|
||||
* @param {Object} options
|
||||
* @param {string} options.openaiApiKey - OpenAI API key (for embeddings)
|
||||
* @param {string} [options.groqApiKey] - Groq API key (for chat completions)
|
||||
* @param {Object} options.mysqlConnection - MySQL connection for taxonomy data
|
||||
* @param {Object} [options.pool] - PostgreSQL pool for prompt loading
|
||||
* @param {Object} [options.logger] - Logger instance
|
||||
*/
|
||||
async function initialize({ openaiApiKey, groqApiKey, mysqlConnection, pool, logger: customLogger }) {
|
||||
if (initialized) {
|
||||
return { success: true, message: 'Already initialized' };
|
||||
}
|
||||
|
||||
if (initializing) {
|
||||
// Wait for existing initialization
|
||||
while (initializing) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' };
|
||||
}
|
||||
|
||||
initializing = true;
|
||||
|
||||
try {
|
||||
if (customLogger) {
|
||||
logger = customLogger;
|
||||
}
|
||||
|
||||
if (!openaiApiKey) {
|
||||
throw new Error('OpenAI API key is required');
|
||||
}
|
||||
|
||||
logger.info('[AI] Initializing AI service...');
|
||||
|
||||
// Store pool reference for tasks
|
||||
if (pool) {
|
||||
appPool = pool;
|
||||
}
|
||||
|
||||
// Create OpenAI provider (for embeddings)
|
||||
openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey });
|
||||
|
||||
// Create Groq provider (for chat completions) if API key provided
|
||||
if (groqApiKey) {
|
||||
groqProvider = new GroqProvider({ apiKey: groqApiKey });
|
||||
logger.info('[AI] Groq provider initialized for chat completions');
|
||||
} else {
|
||||
logger.warn('[AI] No Groq API key provided - chat completion tasks will not be available');
|
||||
}
|
||||
|
||||
// Create and initialize taxonomy embeddings
|
||||
taxonomyEmbeddings = new TaxonomyEmbeddings({
|
||||
provider: openaiProvider,
|
||||
logger
|
||||
});
|
||||
|
||||
const stats = await taxonomyEmbeddings.initialize(mysqlConnection);
|
||||
|
||||
// Register validation tasks if Groq is available
|
||||
if (groqProvider) {
|
||||
registerValidationTasks();
|
||||
}
|
||||
|
||||
initialized = true;
|
||||
logger.info('[AI] AI service initialized', {
|
||||
...stats,
|
||||
groqEnabled: !!groqProvider,
|
||||
tasksRegistered: getRegistry().list()
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: 'Initialized',
|
||||
stats,
|
||||
groqEnabled: !!groqProvider
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('[AI] Initialization failed:', error);
|
||||
return { success: false, message: error.message };
|
||||
} finally {
|
||||
initializing = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register validation tasks with the task registry
|
||||
* Called during initialization if Groq is available
|
||||
*/
|
||||
function registerValidationTasks() {
|
||||
registerAllTasks(logger);
|
||||
logger.info('[AI] Validation tasks registered');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if service is ready
|
||||
*/
|
||||
function isReady() {
|
||||
return initialized && taxonomyEmbeddings?.isReady();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build weighted product text for embedding.
|
||||
* Weights the product name heavily by repeating it, and truncates long descriptions
|
||||
* to prevent verbose marketing copy from drowning out the product signal.
|
||||
*
|
||||
* @param {Object} product - Product with name, description, company, line
|
||||
* @returns {string} - Combined text for embedding
|
||||
*/
|
||||
function buildProductText(product) {
|
||||
const parts = [];
|
||||
const name = product.name?.trim();
|
||||
const description = product.description?.trim();
|
||||
const company = (product.company_name || product.company)?.trim();
|
||||
const line = (product.line_name || product.line)?.trim();
|
||||
|
||||
// Name is most important - repeat 3x to weight it heavily in the embedding
|
||||
if (name) {
|
||||
parts.push(name, name, name);
|
||||
}
|
||||
|
||||
// Company and line provide context
|
||||
if (company) {
|
||||
parts.push(company);
|
||||
}
|
||||
if (line) {
|
||||
parts.push(line);
|
||||
}
|
||||
|
||||
// Truncate description to prevent it from overwhelming the signal
|
||||
if (description) {
|
||||
const truncated = description.length > 500
|
||||
? description.substring(0, 500) + '...'
|
||||
: description;
|
||||
parts.push(truncated);
|
||||
}
|
||||
|
||||
return parts.join(' ').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding for a product
|
||||
* @param {Object} product - Product with name, description, company, line
|
||||
* @returns {Promise<{embedding: number[], latencyMs: number}>}
|
||||
*/
|
||||
async function getProductEmbedding(product) {
|
||||
if (!initialized || !openaiProvider) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const text = buildProductText(product);
|
||||
|
||||
if (!text) {
|
||||
return { embedding: null, latencyMs: 0 };
|
||||
}
|
||||
|
||||
const result = await openaiProvider.embed(text);
|
||||
|
||||
return {
|
||||
embedding: result.embeddings[0],
|
||||
latencyMs: result.latencyMs
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple products
|
||||
* @param {Object[]} products - Array of products
|
||||
* @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>}
|
||||
*/
|
||||
async function getProductEmbeddings(products) {
|
||||
if (!initialized || !openaiProvider) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const texts = products.map(buildProductText);
|
||||
|
||||
// Track which products have empty text
|
||||
const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0);
|
||||
const validTexts = texts.filter(t => t);
|
||||
|
||||
if (validTexts.length === 0) {
|
||||
return { embeddings: [], latencyMs: 0 };
|
||||
}
|
||||
|
||||
const result = await openaiProvider.embed(validTexts);
|
||||
|
||||
// Map embeddings back to original indices
|
||||
const embeddings = validIndices.map((originalIndex, resultIndex) => ({
|
||||
index: originalIndex,
|
||||
embedding: result.embeddings[resultIndex]
|
||||
}));
|
||||
|
||||
return {
|
||||
embeddings,
|
||||
latencyMs: result.latencyMs
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar taxonomy items for a product embedding
|
||||
* @param {number[]} productEmbedding
|
||||
* @param {Object} options
|
||||
* @returns {{categories: Array, themes: Array, colors: Array}}
|
||||
*/
|
||||
function findSimilarTaxonomy(productEmbedding, options = {}) {
|
||||
if (!initialized || !taxonomyEmbeddings) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const topCategories = options.topCategories ?? 10;
|
||||
const topThemes = options.topThemes ?? 5;
|
||||
const topColors = options.topColors ?? 5;
|
||||
|
||||
return {
|
||||
categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories),
|
||||
themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes),
|
||||
colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors)
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get product embedding and find similar taxonomy in one call
|
||||
* @param {Object} product
|
||||
* @param {Object} options
|
||||
*/
|
||||
async function getSuggestionsForProduct(product, options = {}) {
|
||||
const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product);
|
||||
|
||||
if (!embedding) {
|
||||
return {
|
||||
categories: [],
|
||||
themes: [],
|
||||
colors: [],
|
||||
latencyMs: embeddingLatency
|
||||
};
|
||||
}
|
||||
|
||||
const startSearch = Date.now();
|
||||
const suggestions = findSimilarTaxonomy(embedding, options);
|
||||
const searchLatency = Date.now() - startSearch;
|
||||
|
||||
return {
|
||||
...suggestions,
|
||||
latencyMs: embeddingLatency + searchLatency,
|
||||
embeddingLatencyMs: embeddingLatency,
|
||||
searchLatencyMs: searchLatency
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all taxonomy data (without embeddings) for frontend
|
||||
*/
|
||||
function getTaxonomyData() {
|
||||
if (!initialized || !taxonomyEmbeddings) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
return taxonomyEmbeddings.getTaxonomyData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get service status
|
||||
*/
|
||||
function getStatus() {
|
||||
const registry = getRegistry();
|
||||
|
||||
return {
|
||||
initialized,
|
||||
ready: isReady(),
|
||||
hasOpenAI: !!openaiProvider,
|
||||
hasGroq: !!groqProvider,
|
||||
hasTaxonomy: !!taxonomyEmbeddings,
|
||||
taxonomyStats: taxonomyEmbeddings ? {
|
||||
categories: taxonomyEmbeddings.categories?.length || 0,
|
||||
themes: taxonomyEmbeddings.themes?.length || 0,
|
||||
colors: taxonomyEmbeddings.colors?.length || 0
|
||||
} : null,
|
||||
tasks: {
|
||||
registered: registry.list(),
|
||||
count: registry.size()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run an AI task by ID
|
||||
* @param {string} taskId - Task identifier from TASK_IDS
|
||||
* @param {Object} payload - Task-specific input
|
||||
* @returns {Promise<Object>} Task result
|
||||
*/
|
||||
async function runTask(taskId, payload = {}) {
|
||||
if (!initialized) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
if (!groqProvider) {
|
||||
throw new Error('Groq provider not available - chat completion tasks require GROQ_API_KEY');
|
||||
}
|
||||
|
||||
const registry = getRegistry();
|
||||
return registry.runTask(taskId, {
|
||||
...payload,
|
||||
// Inject dependencies tasks may need
|
||||
provider: groqProvider,
|
||||
// Use pool from payload if provided (from route), fall back to stored appPool
|
||||
pool: payload.pool || appPool,
|
||||
logger
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Groq provider instance (for direct use if needed)
|
||||
* @returns {GroqProvider|null}
|
||||
*/
|
||||
function getGroqProvider() {
|
||||
return groqProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the PostgreSQL pool (for tasks that need DB access)
|
||||
* @returns {Object|null}
|
||||
*/
|
||||
function getPool() {
|
||||
return appPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if chat completion tasks are available
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function hasChatCompletion() {
|
||||
return !!groqProvider;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
// Initialization
|
||||
initialize,
|
||||
isReady,
|
||||
getStatus,
|
||||
|
||||
// Embeddings (OpenAI)
|
||||
getProductEmbedding,
|
||||
getProductEmbeddings,
|
||||
findSimilarTaxonomy,
|
||||
getSuggestionsForProduct,
|
||||
getTaxonomyData,
|
||||
|
||||
// Chat completions (Groq)
|
||||
runTask,
|
||||
hasChatCompletion,
|
||||
getGroqProvider,
|
||||
getPool,
|
||||
|
||||
// Constants
|
||||
TASK_IDS,
|
||||
GROQ_MODELS,
|
||||
|
||||
// Re-export utilities
|
||||
cosineSimilarity,
|
||||
findTopMatches
|
||||
};
|
||||
Reference in New Issue
Block a user