Add AI embeddings and suggestions for categories, a few validation step tweaks/fixes
This commit is contained in:
281
inventory-server/src/routes/ai.js
Normal file
281
inventory-server/src/routes/ai.js
Normal file
@@ -0,0 +1,281 @@
|
||||
/**
|
||||
* AI Routes
|
||||
*
|
||||
* API endpoints for AI-powered product validation features.
|
||||
* Provides embedding generation and similarity-based suggestions.
|
||||
*/
|
||||
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const aiService = require('../services/ai');
|
||||
const { getDbConnection, closeAllConnections } = require('../utils/dbConnection');
|
||||
|
||||
// Track initialization state
|
||||
let initializationPromise = null;
|
||||
|
||||
/**
|
||||
* Ensure AI service is initialized
|
||||
* Uses lazy initialization on first request
|
||||
*/
|
||||
async function ensureInitialized() {
|
||||
if (aiService.isReady()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (initializationPromise) {
|
||||
await initializationPromise;
|
||||
return aiService.isReady();
|
||||
}
|
||||
|
||||
initializationPromise = (async () => {
|
||||
try {
|
||||
console.log('[AI Routes] Initializing AI service...');
|
||||
|
||||
// Get database connection for taxonomy
|
||||
const { connection } = await getDbConnection();
|
||||
|
||||
const result = await aiService.initialize({
|
||||
openaiApiKey: process.env.OPENAI_API_KEY,
|
||||
mysqlConnection: connection,
|
||||
logger: console
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
console.error('[AI Routes] AI service initialization failed:', result.message);
|
||||
return false;
|
||||
}
|
||||
|
||||
console.log('[AI Routes] AI service initialized:', result.stats);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Failed to initialize AI service:', error);
|
||||
return false;
|
||||
}
|
||||
})();
|
||||
|
||||
await initializationPromise;
|
||||
return aiService.isReady();
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/ai/status
|
||||
* Get AI service status
|
||||
*/
|
||||
router.get('/status', async (req, res) => {
|
||||
try {
|
||||
const status = aiService.getStatus();
|
||||
res.json(status);
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/initialize
|
||||
* Manually trigger initialization (also happens automatically on first use)
|
||||
*/
|
||||
router.post('/initialize', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
const status = aiService.getStatus();
|
||||
|
||||
res.json({
|
||||
success: ready,
|
||||
...status
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Initialize error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/ai/taxonomy
|
||||
* Get all taxonomy data (categories, themes, colors) without embeddings
|
||||
*/
|
||||
router.get('/taxonomy', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const taxonomy = aiService.getTaxonomyData();
|
||||
res.json(taxonomy);
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Taxonomy error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/embedding
|
||||
* Generate embedding for a single product
|
||||
*
|
||||
* Body: { product: { name, description, company_name, line_name } }
|
||||
* Returns: { embedding: number[], latencyMs: number }
|
||||
*/
|
||||
router.post('/embedding', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const { product } = req.body;
|
||||
|
||||
if (!product) {
|
||||
return res.status(400).json({ error: 'Product is required' });
|
||||
}
|
||||
|
||||
const result = await aiService.getProductEmbedding(product);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Embedding error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/embeddings
|
||||
* Generate embeddings for multiple products
|
||||
*
|
||||
* Body: { products: Array<{ name, description, company_name, line_name }> }
|
||||
* Returns: { embeddings: Array<{ index, embedding }>, latencyMs }
|
||||
*/
|
||||
router.post('/embeddings', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const { products } = req.body;
|
||||
|
||||
if (!Array.isArray(products)) {
|
||||
return res.status(400).json({ error: 'Products array is required' });
|
||||
}
|
||||
|
||||
const result = await aiService.getProductEmbeddings(products);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Embeddings error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/suggestions
|
||||
* Get category/theme/color suggestions for a single product
|
||||
* Generates embedding and finds similar taxonomy items
|
||||
*
|
||||
* Body: { product: { name, description, company_name, line_name }, options?: { topCategories, topThemes, topColors } }
|
||||
* Returns: { categories: Array, themes: Array, colors: Array, latencyMs }
|
||||
*/
|
||||
router.post('/suggestions', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const { product, options } = req.body;
|
||||
|
||||
if (!product) {
|
||||
return res.status(400).json({ error: 'Product is required' });
|
||||
}
|
||||
|
||||
const suggestions = await aiService.getSuggestionsForProduct(product, options);
|
||||
res.json(suggestions);
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Suggestions error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/suggestions/batch
|
||||
* Get suggestions for multiple products
|
||||
* More efficient than calling /suggestions multiple times
|
||||
*
|
||||
* Body: { products: Array, options?: { topCategories, topThemes, topColors } }
|
||||
* Returns: { results: Array<{ index, categories, themes, colors }>, latencyMs }
|
||||
*/
|
||||
router.post('/suggestions/batch', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const { products, options } = req.body;
|
||||
|
||||
if (!Array.isArray(products)) {
|
||||
return res.status(400).json({ error: 'Products array is required' });
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// Generate all embeddings at once
|
||||
const { embeddings, latencyMs: embeddingLatency } = await aiService.getProductEmbeddings(products);
|
||||
|
||||
// Find suggestions for each embedding
|
||||
const results = embeddings.map(({ index, embedding }) => {
|
||||
const suggestions = aiService.findSimilarTaxonomy(embedding, options);
|
||||
return {
|
||||
index,
|
||||
...suggestions
|
||||
};
|
||||
});
|
||||
|
||||
const totalLatency = Date.now() - startTime;
|
||||
|
||||
res.json({
|
||||
results,
|
||||
latencyMs: totalLatency,
|
||||
embeddingLatencyMs: embeddingLatency,
|
||||
searchLatencyMs: totalLatency - embeddingLatency,
|
||||
productCount: products.length,
|
||||
embeddingCount: embeddings.length
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Batch suggestions error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/ai/similar
|
||||
* Find similar taxonomy items given a pre-computed embedding
|
||||
* Useful when frontend has cached the embedding
|
||||
*
|
||||
* Body: { embedding: number[], options?: { topCategories, topThemes, topColors } }
|
||||
* Returns: { categories, themes, colors }
|
||||
*/
|
||||
router.post('/similar', async (req, res) => {
|
||||
try {
|
||||
const ready = await ensureInitialized();
|
||||
if (!ready) {
|
||||
return res.status(503).json({ error: 'AI service not available' });
|
||||
}
|
||||
|
||||
const { embedding, options } = req.body;
|
||||
|
||||
if (!embedding || !Array.isArray(embedding)) {
|
||||
return res.status(400).json({ error: 'Embedding array is required' });
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const suggestions = aiService.findSimilarTaxonomy(embedding, options);
|
||||
|
||||
res.json({
|
||||
...suggestions,
|
||||
latencyMs: Date.now() - startTime
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[AI Routes] Similar error:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
@@ -15,6 +15,7 @@ const configRouter = require('./routes/config');
|
||||
const metricsRouter = require('./routes/metrics');
|
||||
const importRouter = require('./routes/import');
|
||||
const aiValidationRouter = require('./routes/ai-validation');
|
||||
const aiRouter = require('./routes/ai');
|
||||
const templatesRouter = require('./routes/templates');
|
||||
const aiPromptsRouter = require('./routes/ai-prompts');
|
||||
const reusableImagesRouter = require('./routes/reusable-images');
|
||||
@@ -124,6 +125,7 @@ async function startServer() {
|
||||
app.use('/api/brands-aggregate', brandsAggregateRouter);
|
||||
app.use('/api/import', importRouter);
|
||||
app.use('/api/ai-validation', aiValidationRouter);
|
||||
app.use('/api/ai', aiRouter);
|
||||
app.use('/api/templates', templatesRouter);
|
||||
app.use('/api/ai-prompts', aiPromptsRouter);
|
||||
app.use('/api/reusable-images', reusableImagesRouter);
|
||||
|
||||
82
inventory-server/src/services/ai/embeddings/similarity.js
Normal file
82
inventory-server/src/services/ai/embeddings/similarity.js
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Vector similarity utilities
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compute cosine similarity between two vectors
|
||||
* @param {number[]} a
|
||||
* @param {number[]} b
|
||||
* @returns {number} Similarity score between -1 and 1
|
||||
*/
|
||||
function cosineSimilarity(a, b) {
|
||||
if (!a || !b || a.length !== b.length) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
if (denominator === 0) return 0;
|
||||
|
||||
return dotProduct / denominator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find top K most similar items from a collection
|
||||
* @param {number[]} queryEmbedding - The embedding to search for
|
||||
* @param {Array<{id: any, embedding: number[]}>} items - Items with embeddings
|
||||
* @param {number} topK - Number of results to return
|
||||
* @returns {Array<{id: any, similarity: number}>}
|
||||
*/
|
||||
function findTopMatches(queryEmbedding, items, topK = 10) {
|
||||
if (!queryEmbedding || !items || items.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const scored = items.map(item => ({
|
||||
id: item.id,
|
||||
similarity: cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
|
||||
scored.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
return scored.slice(0, topK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find matches above a similarity threshold
|
||||
* @param {number[]} queryEmbedding
|
||||
* @param {Array<{id: any, embedding: number[]}>} items
|
||||
* @param {number} threshold - Minimum similarity (0-1)
|
||||
* @returns {Array<{id: any, similarity: number}>}
|
||||
*/
|
||||
function findMatchesAboveThreshold(queryEmbedding, items, threshold = 0.5) {
|
||||
if (!queryEmbedding || !items || items.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const scored = items
|
||||
.map(item => ({
|
||||
id: item.id,
|
||||
similarity: cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}))
|
||||
.filter(item => item.similarity >= threshold);
|
||||
|
||||
scored.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
return scored;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cosineSimilarity,
|
||||
findTopMatches,
|
||||
findMatchesAboveThreshold
|
||||
};
|
||||
@@ -0,0 +1,323 @@
|
||||
/**
|
||||
* Taxonomy Embedding Service
|
||||
*
|
||||
* Generates and caches embeddings for categories, themes, and colors.
|
||||
* Excludes "Black Friday", "Gifts", "Deals" categories and their children.
|
||||
*/
|
||||
|
||||
const { findTopMatches } = require('./similarity');
|
||||
|
||||
// Categories to exclude (and all their children)
|
||||
const EXCLUDED_CATEGORY_NAMES = ['black friday', 'gifts', 'deals'];
|
||||
|
||||
class TaxonomyEmbeddings {
|
||||
constructor({ provider, logger }) {
|
||||
this.provider = provider;
|
||||
this.logger = logger || console;
|
||||
|
||||
// Cached taxonomy with embeddings
|
||||
this.categories = [];
|
||||
this.themes = [];
|
||||
this.colors = [];
|
||||
|
||||
// Raw data without embeddings (for lookup)
|
||||
this.categoryMap = new Map();
|
||||
this.themeMap = new Map();
|
||||
this.colorMap = new Map();
|
||||
|
||||
this.initialized = false;
|
||||
this.initializing = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize embeddings - fetch taxonomy and generate embeddings
|
||||
*/
|
||||
async initialize(connection) {
|
||||
if (this.initialized) {
|
||||
return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length };
|
||||
}
|
||||
|
||||
if (this.initializing) {
|
||||
// Wait for existing initialization
|
||||
while (this.initializing) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length };
|
||||
}
|
||||
|
||||
this.initializing = true;
|
||||
|
||||
try {
|
||||
this.logger.info('[TaxonomyEmbeddings] Starting initialization...');
|
||||
|
||||
// Fetch raw taxonomy data
|
||||
const [categories, themes, colors] = await Promise.all([
|
||||
this._fetchCategories(connection),
|
||||
this._fetchThemes(connection),
|
||||
this._fetchColors(connection)
|
||||
]);
|
||||
|
||||
this.logger.info(`[TaxonomyEmbeddings] Fetched ${categories.length} categories, ${themes.length} themes, ${colors.length} colors`);
|
||||
|
||||
// Generate embeddings in parallel
|
||||
const [catEmbeddings, themeEmbeddings, colorEmbeddings] = await Promise.all([
|
||||
this._generateEmbeddings(categories, 'categories'),
|
||||
this._generateEmbeddings(themes, 'themes'),
|
||||
this._generateEmbeddings(colors, 'colors')
|
||||
]);
|
||||
|
||||
// Store with embeddings
|
||||
this.categories = catEmbeddings;
|
||||
this.themes = themeEmbeddings;
|
||||
this.colors = colorEmbeddings;
|
||||
|
||||
// Build lookup maps
|
||||
this.categoryMap = new Map(this.categories.map(c => [c.id, c]));
|
||||
this.themeMap = new Map(this.themes.map(t => [t.id, t]));
|
||||
this.colorMap = new Map(this.colors.map(c => [c.id, c]));
|
||||
|
||||
this.initialized = true;
|
||||
this.logger.info('[TaxonomyEmbeddings] Initialization complete');
|
||||
|
||||
return {
|
||||
categories: this.categories.length,
|
||||
themes: this.themes.length,
|
||||
colors: this.colors.length
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('[TaxonomyEmbeddings] Initialization failed:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
this.initializing = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar categories for a product embedding
|
||||
*/
|
||||
findSimilarCategories(productEmbedding, topK = 10) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.categories, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const cat = this.categoryMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: cat?.name || '',
|
||||
fullPath: cat?.fullPath || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar themes for a product embedding
|
||||
*/
|
||||
findSimilarThemes(productEmbedding, topK = 5) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.themes, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const theme = this.themeMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: theme?.name || '',
|
||||
fullPath: theme?.fullPath || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar colors for a product embedding
|
||||
*/
|
||||
findSimilarColors(productEmbedding, topK = 5) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.colors, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const color = this.colorMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: color?.name || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all taxonomy data (without embeddings) for frontend
|
||||
*/
|
||||
getTaxonomyData() {
|
||||
return {
|
||||
categories: this.categories.map(({ id, name, fullPath, parentId }) => ({ id, name, fullPath, parentId })),
|
||||
themes: this.themes.map(({ id, name, fullPath, parentId }) => ({ id, name, fullPath, parentId })),
|
||||
colors: this.colors.map(({ id, name }) => ({ id, name }))
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if service is ready
|
||||
*/
|
||||
isReady() {
|
||||
return this.initialized;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Methods
|
||||
// ============================================================================
|
||||
|
||||
async _fetchCategories(connection) {
|
||||
// Fetch hierarchical categories (types 10-13)
|
||||
const [rows] = await connection.query(`
|
||||
SELECT cat_id, name, master_cat_id, type
|
||||
FROM product_categories
|
||||
WHERE type IN (10, 11, 12, 13)
|
||||
ORDER BY type, name
|
||||
`);
|
||||
|
||||
// Build lookup for hierarchy
|
||||
const byId = new Map(rows.map(r => [r.cat_id, r]));
|
||||
|
||||
// Find IDs of excluded top-level categories and all their descendants
|
||||
const excludedIds = new Set();
|
||||
|
||||
// First pass: find excluded top-level categories
|
||||
for (const row of rows) {
|
||||
if (row.type === 10 && EXCLUDED_CATEGORY_NAMES.includes(row.name.toLowerCase())) {
|
||||
excludedIds.add(row.cat_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiple passes to find all descendants
|
||||
let foundNew = true;
|
||||
while (foundNew) {
|
||||
foundNew = false;
|
||||
for (const row of rows) {
|
||||
if (!excludedIds.has(row.cat_id) && excludedIds.has(row.master_cat_id)) {
|
||||
excludedIds.add(row.cat_id);
|
||||
foundNew = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.info(`[TaxonomyEmbeddings] Excluding ${excludedIds.size} categories (Black Friday, Gifts, Deals and children)`);
|
||||
|
||||
// Build category objects with full paths, excluding filtered ones
|
||||
const categories = [];
|
||||
|
||||
for (const row of rows) {
|
||||
if (excludedIds.has(row.cat_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const path = [];
|
||||
let current = row;
|
||||
|
||||
// Walk up the tree to build full path
|
||||
while (current) {
|
||||
path.unshift(current.name);
|
||||
current = current.master_cat_id ? byId.get(current.master_cat_id) : null;
|
||||
}
|
||||
|
||||
categories.push({
|
||||
id: row.cat_id,
|
||||
name: row.name,
|
||||
parentId: row.master_cat_id,
|
||||
type: row.type,
|
||||
fullPath: path.join(' > '),
|
||||
embeddingText: path.join(' ')
|
||||
});
|
||||
}
|
||||
|
||||
return categories;
|
||||
}
|
||||
|
||||
async _fetchThemes(connection) {
|
||||
// Fetch themes (types 20-21)
|
||||
const [rows] = await connection.query(`
|
||||
SELECT cat_id, name, master_cat_id, type
|
||||
FROM product_categories
|
||||
WHERE type IN (20, 21)
|
||||
ORDER BY type, name
|
||||
`);
|
||||
|
||||
const byId = new Map(rows.map(r => [r.cat_id, r]));
|
||||
const themes = [];
|
||||
|
||||
for (const row of rows) {
|
||||
const path = [];
|
||||
let current = row;
|
||||
|
||||
while (current) {
|
||||
path.unshift(current.name);
|
||||
current = current.master_cat_id ? byId.get(current.master_cat_id) : null;
|
||||
}
|
||||
|
||||
themes.push({
|
||||
id: row.cat_id,
|
||||
name: row.name,
|
||||
parentId: row.master_cat_id,
|
||||
type: row.type,
|
||||
fullPath: path.join(' > '),
|
||||
embeddingText: path.join(' ')
|
||||
});
|
||||
}
|
||||
|
||||
return themes;
|
||||
}
|
||||
|
||||
async _fetchColors(connection) {
|
||||
const [rows] = await connection.query(`
|
||||
SELECT color, name, hex_color
|
||||
FROM product_color_list
|
||||
ORDER BY \`order\`
|
||||
`);
|
||||
|
||||
return rows.map(row => ({
|
||||
id: row.color,
|
||||
name: row.name,
|
||||
hexColor: row.hex_color,
|
||||
embeddingText: row.name
|
||||
}));
|
||||
}
|
||||
|
||||
async _generateEmbeddings(items, label) {
|
||||
if (items.length === 0) {
|
||||
return items;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const texts = items.map(item => item.embeddingText);
|
||||
const results = [...items];
|
||||
|
||||
// Process in batches
|
||||
let batchNum = 0;
|
||||
for await (const chunk of this.provider.embedBatchChunked(texts, { batchSize: 100 })) {
|
||||
batchNum++;
|
||||
for (let i = 0; i < chunk.embeddings.length; i++) {
|
||||
const globalIndex = chunk.startIndex + i;
|
||||
results[globalIndex] = {
|
||||
...results[globalIndex],
|
||||
embedding: chunk.embeddings[i]
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const elapsed = Date.now() - startTime;
|
||||
this.logger.info(`[TaxonomyEmbeddings] Generated ${items.length} ${label} embeddings in ${elapsed}ms`);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { TaxonomyEmbeddings };
|
||||
273
inventory-server/src/services/ai/index.js
Normal file
273
inventory-server/src/services/ai/index.js
Normal file
@@ -0,0 +1,273 @@
|
||||
/**
|
||||
* AI Service
|
||||
*
|
||||
* Main entry point for AI functionality including embeddings.
|
||||
* Provides embedding generation and similarity search for product validation.
|
||||
*/
|
||||
|
||||
const { OpenAIProvider } = require('./providers/openaiProvider');
|
||||
const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings');
|
||||
const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity');
|
||||
|
||||
let initialized = false;
|
||||
let initializing = false;
|
||||
let openaiProvider = null;
|
||||
let taxonomyEmbeddings = null;
|
||||
let logger = console;
|
||||
|
||||
/**
|
||||
* Initialize the AI service
|
||||
* @param {Object} options
|
||||
* @param {string} options.openaiApiKey - OpenAI API key
|
||||
* @param {Object} options.mysqlConnection - MySQL connection for taxonomy data
|
||||
* @param {Object} [options.logger] - Logger instance
|
||||
*/
|
||||
async function initialize({ openaiApiKey, mysqlConnection, logger: customLogger }) {
|
||||
if (initialized) {
|
||||
return { success: true, message: 'Already initialized' };
|
||||
}
|
||||
|
||||
if (initializing) {
|
||||
// Wait for existing initialization
|
||||
while (initializing) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' };
|
||||
}
|
||||
|
||||
initializing = true;
|
||||
|
||||
try {
|
||||
if (customLogger) {
|
||||
logger = customLogger;
|
||||
}
|
||||
|
||||
if (!openaiApiKey) {
|
||||
throw new Error('OpenAI API key is required');
|
||||
}
|
||||
|
||||
logger.info('[AI] Initializing AI service...');
|
||||
|
||||
// Create OpenAI provider
|
||||
openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey });
|
||||
|
||||
// Create and initialize taxonomy embeddings
|
||||
taxonomyEmbeddings = new TaxonomyEmbeddings({
|
||||
provider: openaiProvider,
|
||||
logger
|
||||
});
|
||||
|
||||
const stats = await taxonomyEmbeddings.initialize(mysqlConnection);
|
||||
|
||||
initialized = true;
|
||||
logger.info('[AI] AI service initialized', stats);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: 'Initialized',
|
||||
stats
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('[AI] Initialization failed:', error);
|
||||
return { success: false, message: error.message };
|
||||
} finally {
|
||||
initializing = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if service is ready
|
||||
*/
|
||||
function isReady() {
|
||||
return initialized && taxonomyEmbeddings?.isReady();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build weighted product text for embedding.
|
||||
* Weights the product name heavily by repeating it, and truncates long descriptions
|
||||
* to prevent verbose marketing copy from drowning out the product signal.
|
||||
*
|
||||
* @param {Object} product - Product with name, description, company, line
|
||||
* @returns {string} - Combined text for embedding
|
||||
*/
|
||||
function buildProductText(product) {
|
||||
const parts = [];
|
||||
const name = product.name?.trim();
|
||||
const description = product.description?.trim();
|
||||
const company = (product.company_name || product.company)?.trim();
|
||||
const line = (product.line_name || product.line)?.trim();
|
||||
|
||||
// Name is most important - repeat 3x to weight it heavily in the embedding
|
||||
if (name) {
|
||||
parts.push(name, name, name);
|
||||
}
|
||||
|
||||
// Company and line provide context
|
||||
if (company) {
|
||||
parts.push(company);
|
||||
}
|
||||
if (line) {
|
||||
parts.push(line);
|
||||
}
|
||||
|
||||
// Truncate description to prevent it from overwhelming the signal
|
||||
if (description) {
|
||||
const truncated = description.length > 500
|
||||
? description.substring(0, 500) + '...'
|
||||
: description;
|
||||
parts.push(truncated);
|
||||
}
|
||||
|
||||
return parts.join(' ').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding for a product
|
||||
* @param {Object} product - Product with name, description, company, line
|
||||
* @returns {Promise<{embedding: number[], latencyMs: number}>}
|
||||
*/
|
||||
async function getProductEmbedding(product) {
|
||||
if (!initialized || !openaiProvider) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const text = buildProductText(product);
|
||||
|
||||
if (!text) {
|
||||
return { embedding: null, latencyMs: 0 };
|
||||
}
|
||||
|
||||
const result = await openaiProvider.embed(text);
|
||||
|
||||
return {
|
||||
embedding: result.embeddings[0],
|
||||
latencyMs: result.latencyMs
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple products
|
||||
* @param {Object[]} products - Array of products
|
||||
* @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>}
|
||||
*/
|
||||
async function getProductEmbeddings(products) {
|
||||
if (!initialized || !openaiProvider) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const texts = products.map(buildProductText);
|
||||
|
||||
// Track which products have empty text
|
||||
const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0);
|
||||
const validTexts = texts.filter(t => t);
|
||||
|
||||
if (validTexts.length === 0) {
|
||||
return { embeddings: [], latencyMs: 0 };
|
||||
}
|
||||
|
||||
const result = await openaiProvider.embed(validTexts);
|
||||
|
||||
// Map embeddings back to original indices
|
||||
const embeddings = validIndices.map((originalIndex, resultIndex) => ({
|
||||
index: originalIndex,
|
||||
embedding: result.embeddings[resultIndex]
|
||||
}));
|
||||
|
||||
return {
|
||||
embeddings,
|
||||
latencyMs: result.latencyMs
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar taxonomy items for a product embedding
|
||||
* @param {number[]} productEmbedding
|
||||
* @param {Object} options
|
||||
* @returns {{categories: Array, themes: Array, colors: Array}}
|
||||
*/
|
||||
function findSimilarTaxonomy(productEmbedding, options = {}) {
|
||||
if (!initialized || !taxonomyEmbeddings) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
const topCategories = options.topCategories ?? 10;
|
||||
const topThemes = options.topThemes ?? 5;
|
||||
const topColors = options.topColors ?? 5;
|
||||
|
||||
return {
|
||||
categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories),
|
||||
themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes),
|
||||
colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors)
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get product embedding and find similar taxonomy in one call
|
||||
* @param {Object} product
|
||||
* @param {Object} options
|
||||
*/
|
||||
async function getSuggestionsForProduct(product, options = {}) {
|
||||
const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product);
|
||||
|
||||
if (!embedding) {
|
||||
return {
|
||||
categories: [],
|
||||
themes: [],
|
||||
colors: [],
|
||||
latencyMs: embeddingLatency
|
||||
};
|
||||
}
|
||||
|
||||
const startSearch = Date.now();
|
||||
const suggestions = findSimilarTaxonomy(embedding, options);
|
||||
const searchLatency = Date.now() - startSearch;
|
||||
|
||||
return {
|
||||
...suggestions,
|
||||
latencyMs: embeddingLatency + searchLatency,
|
||||
embeddingLatencyMs: embeddingLatency,
|
||||
searchLatencyMs: searchLatency
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all taxonomy data (without embeddings) for frontend
|
||||
*/
|
||||
function getTaxonomyData() {
|
||||
if (!initialized || !taxonomyEmbeddings) {
|
||||
throw new Error('AI service not initialized');
|
||||
}
|
||||
|
||||
return taxonomyEmbeddings.getTaxonomyData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get service status
|
||||
*/
|
||||
function getStatus() {
|
||||
return {
|
||||
initialized,
|
||||
ready: isReady(),
|
||||
hasProvider: !!openaiProvider,
|
||||
hasTaxonomy: !!taxonomyEmbeddings,
|
||||
taxonomyStats: taxonomyEmbeddings ? {
|
||||
categories: taxonomyEmbeddings.categories?.length || 0,
|
||||
themes: taxonomyEmbeddings.themes?.length || 0,
|
||||
colors: taxonomyEmbeddings.colors?.length || 0
|
||||
} : null
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initialize,
|
||||
isReady,
|
||||
getProductEmbedding,
|
||||
getProductEmbeddings,
|
||||
findSimilarTaxonomy,
|
||||
getSuggestionsForProduct,
|
||||
getTaxonomyData,
|
||||
getStatus,
|
||||
// Re-export utilities
|
||||
cosineSimilarity,
|
||||
findTopMatches
|
||||
};
|
||||
117
inventory-server/src/services/ai/providers/openaiProvider.js
Normal file
117
inventory-server/src/services/ai/providers/openaiProvider.js
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* OpenAI Provider - Handles embedding generation
|
||||
*/
|
||||
|
||||
const EMBEDDING_MODEL = 'text-embedding-3-small';
|
||||
const EMBEDDING_DIMENSIONS = 1536;
|
||||
const MAX_BATCH_SIZE = 2048;
|
||||
|
||||
class OpenAIProvider {
|
||||
constructor({ apiKey, baseUrl = 'https://api.openai.com/v1', timeoutMs = 60000 }) {
|
||||
if (!apiKey) {
|
||||
throw new Error('OpenAI API key is required');
|
||||
}
|
||||
this.apiKey = apiKey;
|
||||
this.baseUrl = baseUrl;
|
||||
this.timeoutMs = timeoutMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for one or more texts
|
||||
* @param {string|string[]} input - Text or array of texts
|
||||
* @param {Object} options
|
||||
* @returns {Promise<{embeddings: number[][], usage: Object, model: string, latencyMs: number}>}
|
||||
*/
|
||||
async embed(input, options = {}) {
|
||||
const texts = Array.isArray(input) ? input : [input];
|
||||
const model = options.model || EMBEDDING_MODEL;
|
||||
const dimensions = options.dimensions || EMBEDDING_DIMENSIONS;
|
||||
const timeoutMs = options.timeoutMs || this.timeoutMs;
|
||||
|
||||
if (texts.length > MAX_BATCH_SIZE) {
|
||||
throw new Error(`Batch size ${texts.length} exceeds max of ${MAX_BATCH_SIZE}`);
|
||||
}
|
||||
|
||||
const started = Date.now();
|
||||
|
||||
// Clean and truncate input texts
|
||||
const cleanedTexts = texts.map(t =>
|
||||
(t || '').replace(/\n+/g, ' ').trim().substring(0, 8000)
|
||||
);
|
||||
|
||||
const body = {
|
||||
input: cleanedTexts,
|
||||
model,
|
||||
encoding_format: 'float'
|
||||
};
|
||||
|
||||
// Only embedding-3 models support dimensions parameter
|
||||
if (model.includes('embedding-3')) {
|
||||
body.dimensions = dimensions;
|
||||
}
|
||||
|
||||
const response = await this._makeRequest('embeddings', body, timeoutMs);
|
||||
|
||||
// Sort by index to ensure order matches input
|
||||
const sortedData = response.data.sort((a, b) => a.index - b.index);
|
||||
|
||||
return {
|
||||
embeddings: sortedData.map(item => item.embedding),
|
||||
usage: {
|
||||
promptTokens: response.usage?.prompt_tokens || 0,
|
||||
totalTokens: response.usage?.total_tokens || 0
|
||||
},
|
||||
model: response.model || model,
|
||||
latencyMs: Date.now() - started
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generator for processing large batches in chunks
|
||||
*/
|
||||
async *embedBatchChunked(texts, options = {}) {
|
||||
const batchSize = Math.min(options.batchSize || 100, MAX_BATCH_SIZE);
|
||||
|
||||
for (let i = 0; i < texts.length; i += batchSize) {
|
||||
const chunk = texts.slice(i, i + batchSize);
|
||||
const result = await this.embed(chunk, options);
|
||||
|
||||
yield {
|
||||
embeddings: result.embeddings,
|
||||
startIndex: i,
|
||||
endIndex: i + chunk.length,
|
||||
usage: result.usage,
|
||||
model: result.model,
|
||||
latencyMs: result.latencyMs
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async _makeRequest(endpoint, body, timeoutMs) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({}));
|
||||
throw new Error(error.error?.message || `OpenAI API error: ${response.status}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { OpenAIProvider, EMBEDDING_MODEL, EMBEDDING_DIMENSIONS };
|
||||
Reference in New Issue
Block a user