Add AI embeddings and suggestions for categories, a few validation step tweaks/fixes

This commit is contained in:
2026-01-19 11:34:55 -05:00
parent 9ce84fe5b9
commit 43d76e011d
20 changed files with 5311 additions and 176 deletions

View File

@@ -0,0 +1,273 @@
/**
* AI Service
*
* Main entry point for AI functionality including embeddings.
* Provides embedding generation and similarity search for product validation.
*/
const { OpenAIProvider } = require('./providers/openaiProvider');
const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings');
const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity');
let initialized = false;
let initializing = false;
let openaiProvider = null;
let taxonomyEmbeddings = null;
let logger = console;
/**
* Initialize the AI service
* @param {Object} options
* @param {string} options.openaiApiKey - OpenAI API key
* @param {Object} options.mysqlConnection - MySQL connection for taxonomy data
* @param {Object} [options.logger] - Logger instance
*/
async function initialize({ openaiApiKey, mysqlConnection, logger: customLogger }) {
if (initialized) {
return { success: true, message: 'Already initialized' };
}
if (initializing) {
// Wait for existing initialization
while (initializing) {
await new Promise(resolve => setTimeout(resolve, 100));
}
return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' };
}
initializing = true;
try {
if (customLogger) {
logger = customLogger;
}
if (!openaiApiKey) {
throw new Error('OpenAI API key is required');
}
logger.info('[AI] Initializing AI service...');
// Create OpenAI provider
openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey });
// Create and initialize taxonomy embeddings
taxonomyEmbeddings = new TaxonomyEmbeddings({
provider: openaiProvider,
logger
});
const stats = await taxonomyEmbeddings.initialize(mysqlConnection);
initialized = true;
logger.info('[AI] AI service initialized', stats);
return {
success: true,
message: 'Initialized',
stats
};
} catch (error) {
logger.error('[AI] Initialization failed:', error);
return { success: false, message: error.message };
} finally {
initializing = false;
}
}
/**
* Check if service is ready
*/
function isReady() {
return initialized && taxonomyEmbeddings?.isReady();
}
/**
* Build weighted product text for embedding.
* Weights the product name heavily by repeating it, and truncates long descriptions
* to prevent verbose marketing copy from drowning out the product signal.
*
* @param {Object} product - Product with name, description, company, line
* @returns {string} - Combined text for embedding
*/
function buildProductText(product) {
const parts = [];
const name = product.name?.trim();
const description = product.description?.trim();
const company = (product.company_name || product.company)?.trim();
const line = (product.line_name || product.line)?.trim();
// Name is most important - repeat 3x to weight it heavily in the embedding
if (name) {
parts.push(name, name, name);
}
// Company and line provide context
if (company) {
parts.push(company);
}
if (line) {
parts.push(line);
}
// Truncate description to prevent it from overwhelming the signal
if (description) {
const truncated = description.length > 500
? description.substring(0, 500) + '...'
: description;
parts.push(truncated);
}
return parts.join(' ').trim();
}
/**
* Generate embedding for a product
* @param {Object} product - Product with name, description, company, line
* @returns {Promise<{embedding: number[], latencyMs: number}>}
*/
async function getProductEmbedding(product) {
if (!initialized || !openaiProvider) {
throw new Error('AI service not initialized');
}
const text = buildProductText(product);
if (!text) {
return { embedding: null, latencyMs: 0 };
}
const result = await openaiProvider.embed(text);
return {
embedding: result.embeddings[0],
latencyMs: result.latencyMs
};
}
/**
* Generate embeddings for multiple products
* @param {Object[]} products - Array of products
* @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>}
*/
async function getProductEmbeddings(products) {
if (!initialized || !openaiProvider) {
throw new Error('AI service not initialized');
}
const texts = products.map(buildProductText);
// Track which products have empty text
const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0);
const validTexts = texts.filter(t => t);
if (validTexts.length === 0) {
return { embeddings: [], latencyMs: 0 };
}
const result = await openaiProvider.embed(validTexts);
// Map embeddings back to original indices
const embeddings = validIndices.map((originalIndex, resultIndex) => ({
index: originalIndex,
embedding: result.embeddings[resultIndex]
}));
return {
embeddings,
latencyMs: result.latencyMs
};
}
/**
* Find similar taxonomy items for a product embedding
* @param {number[]} productEmbedding
* @param {Object} options
* @returns {{categories: Array, themes: Array, colors: Array}}
*/
function findSimilarTaxonomy(productEmbedding, options = {}) {
if (!initialized || !taxonomyEmbeddings) {
throw new Error('AI service not initialized');
}
const topCategories = options.topCategories ?? 10;
const topThemes = options.topThemes ?? 5;
const topColors = options.topColors ?? 5;
return {
categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories),
themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes),
colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors)
};
}
/**
* Get product embedding and find similar taxonomy in one call
* @param {Object} product
* @param {Object} options
*/
async function getSuggestionsForProduct(product, options = {}) {
const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product);
if (!embedding) {
return {
categories: [],
themes: [],
colors: [],
latencyMs: embeddingLatency
};
}
const startSearch = Date.now();
const suggestions = findSimilarTaxonomy(embedding, options);
const searchLatency = Date.now() - startSearch;
return {
...suggestions,
latencyMs: embeddingLatency + searchLatency,
embeddingLatencyMs: embeddingLatency,
searchLatencyMs: searchLatency
};
}
/**
* Get all taxonomy data (without embeddings) for frontend
*/
function getTaxonomyData() {
if (!initialized || !taxonomyEmbeddings) {
throw new Error('AI service not initialized');
}
return taxonomyEmbeddings.getTaxonomyData();
}
/**
* Get service status
*/
function getStatus() {
return {
initialized,
ready: isReady(),
hasProvider: !!openaiProvider,
hasTaxonomy: !!taxonomyEmbeddings,
taxonomyStats: taxonomyEmbeddings ? {
categories: taxonomyEmbeddings.categories?.length || 0,
themes: taxonomyEmbeddings.themes?.length || 0,
colors: taxonomyEmbeddings.colors?.length || 0
} : null
};
}
module.exports = {
initialize,
isReady,
getProductEmbedding,
getProductEmbeddings,
findSimilarTaxonomy,
getSuggestionsForProduct,
getTaxonomyData,
getStatus,
// Re-export utilities
cosineSimilarity,
findTopMatches
};