Restore accidentally removed files, a few forecast tweaks
This commit is contained in:
@@ -0,0 +1,323 @@
|
||||
/**
|
||||
* Taxonomy Embedding Service
|
||||
*
|
||||
* Generates and caches embeddings for categories, themes, and colors.
|
||||
* Excludes "Black Friday", "Gifts", "Deals" categories and their children.
|
||||
*/
|
||||
|
||||
const { findTopMatches } = require('./similarity');
|
||||
|
||||
// Categories to exclude (and all their children)
|
||||
const EXCLUDED_CATEGORY_NAMES = ['black friday', 'gifts', 'deals'];
|
||||
|
||||
class TaxonomyEmbeddings {
|
||||
constructor({ provider, logger }) {
|
||||
this.provider = provider;
|
||||
this.logger = logger || console;
|
||||
|
||||
// Cached taxonomy with embeddings
|
||||
this.categories = [];
|
||||
this.themes = [];
|
||||
this.colors = [];
|
||||
|
||||
// Raw data without embeddings (for lookup)
|
||||
this.categoryMap = new Map();
|
||||
this.themeMap = new Map();
|
||||
this.colorMap = new Map();
|
||||
|
||||
this.initialized = false;
|
||||
this.initializing = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize embeddings - fetch taxonomy and generate embeddings
|
||||
*/
|
||||
async initialize(connection) {
|
||||
if (this.initialized) {
|
||||
return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length };
|
||||
}
|
||||
|
||||
if (this.initializing) {
|
||||
// Wait for existing initialization
|
||||
while (this.initializing) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length };
|
||||
}
|
||||
|
||||
this.initializing = true;
|
||||
|
||||
try {
|
||||
this.logger.info('[TaxonomyEmbeddings] Starting initialization...');
|
||||
|
||||
// Fetch raw taxonomy data
|
||||
const [categories, themes, colors] = await Promise.all([
|
||||
this._fetchCategories(connection),
|
||||
this._fetchThemes(connection),
|
||||
this._fetchColors(connection)
|
||||
]);
|
||||
|
||||
this.logger.info(`[TaxonomyEmbeddings] Fetched ${categories.length} categories, ${themes.length} themes, ${colors.length} colors`);
|
||||
|
||||
// Generate embeddings in parallel
|
||||
const [catEmbeddings, themeEmbeddings, colorEmbeddings] = await Promise.all([
|
||||
this._generateEmbeddings(categories, 'categories'),
|
||||
this._generateEmbeddings(themes, 'themes'),
|
||||
this._generateEmbeddings(colors, 'colors')
|
||||
]);
|
||||
|
||||
// Store with embeddings
|
||||
this.categories = catEmbeddings;
|
||||
this.themes = themeEmbeddings;
|
||||
this.colors = colorEmbeddings;
|
||||
|
||||
// Build lookup maps
|
||||
this.categoryMap = new Map(this.categories.map(c => [c.id, c]));
|
||||
this.themeMap = new Map(this.themes.map(t => [t.id, t]));
|
||||
this.colorMap = new Map(this.colors.map(c => [c.id, c]));
|
||||
|
||||
this.initialized = true;
|
||||
this.logger.info('[TaxonomyEmbeddings] Initialization complete');
|
||||
|
||||
return {
|
||||
categories: this.categories.length,
|
||||
themes: this.themes.length,
|
||||
colors: this.colors.length
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('[TaxonomyEmbeddings] Initialization failed:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
this.initializing = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar categories for a product embedding
|
||||
*/
|
||||
findSimilarCategories(productEmbedding, topK = 10) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.categories, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const cat = this.categoryMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: cat?.name || '',
|
||||
fullPath: cat?.fullPath || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar themes for a product embedding
|
||||
*/
|
||||
findSimilarThemes(productEmbedding, topK = 5) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.themes, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const theme = this.themeMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: theme?.name || '',
|
||||
fullPath: theme?.fullPath || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar colors for a product embedding
|
||||
*/
|
||||
findSimilarColors(productEmbedding, topK = 5) {
|
||||
if (!this.initialized || !productEmbedding) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches = findTopMatches(productEmbedding, this.colors, topK);
|
||||
|
||||
return matches.map(match => {
|
||||
const color = this.colorMap.get(match.id);
|
||||
return {
|
||||
id: match.id,
|
||||
name: color?.name || '',
|
||||
similarity: match.similarity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all taxonomy data (without embeddings) for frontend
|
||||
*/
|
||||
getTaxonomyData() {
|
||||
return {
|
||||
categories: this.categories.map(({ id, name, fullPath, parentId }) => ({ id, name, fullPath, parentId })),
|
||||
themes: this.themes.map(({ id, name, fullPath, parentId }) => ({ id, name, fullPath, parentId })),
|
||||
colors: this.colors.map(({ id, name }) => ({ id, name }))
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if service is ready
|
||||
*/
|
||||
isReady() {
|
||||
return this.initialized;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Methods
|
||||
// ============================================================================
|
||||
|
||||
async _fetchCategories(connection) {
|
||||
// Fetch hierarchical categories (types 10-13)
|
||||
const [rows] = await connection.query(`
|
||||
SELECT cat_id, name, master_cat_id, type
|
||||
FROM product_categories
|
||||
WHERE type IN (10, 11, 12, 13)
|
||||
ORDER BY type, name
|
||||
`);
|
||||
|
||||
// Build lookup for hierarchy
|
||||
const byId = new Map(rows.map(r => [r.cat_id, r]));
|
||||
|
||||
// Find IDs of excluded top-level categories and all their descendants
|
||||
const excludedIds = new Set();
|
||||
|
||||
// First pass: find excluded top-level categories
|
||||
for (const row of rows) {
|
||||
if (row.type === 10 && EXCLUDED_CATEGORY_NAMES.includes(row.name.toLowerCase())) {
|
||||
excludedIds.add(row.cat_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiple passes to find all descendants
|
||||
let foundNew = true;
|
||||
while (foundNew) {
|
||||
foundNew = false;
|
||||
for (const row of rows) {
|
||||
if (!excludedIds.has(row.cat_id) && excludedIds.has(row.master_cat_id)) {
|
||||
excludedIds.add(row.cat_id);
|
||||
foundNew = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.info(`[TaxonomyEmbeddings] Excluding ${excludedIds.size} categories (Black Friday, Gifts, Deals and children)`);
|
||||
|
||||
// Build category objects with full paths, excluding filtered ones
|
||||
const categories = [];
|
||||
|
||||
for (const row of rows) {
|
||||
if (excludedIds.has(row.cat_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const path = [];
|
||||
let current = row;
|
||||
|
||||
// Walk up the tree to build full path
|
||||
while (current) {
|
||||
path.unshift(current.name);
|
||||
current = current.master_cat_id ? byId.get(current.master_cat_id) : null;
|
||||
}
|
||||
|
||||
categories.push({
|
||||
id: row.cat_id,
|
||||
name: row.name,
|
||||
parentId: row.master_cat_id,
|
||||
type: row.type,
|
||||
fullPath: path.join(' > '),
|
||||
embeddingText: path.join(' ')
|
||||
});
|
||||
}
|
||||
|
||||
return categories;
|
||||
}
|
||||
|
||||
async _fetchThemes(connection) {
|
||||
// Fetch themes (types 20-21)
|
||||
const [rows] = await connection.query(`
|
||||
SELECT cat_id, name, master_cat_id, type
|
||||
FROM product_categories
|
||||
WHERE type IN (20, 21)
|
||||
ORDER BY type, name
|
||||
`);
|
||||
|
||||
const byId = new Map(rows.map(r => [r.cat_id, r]));
|
||||
const themes = [];
|
||||
|
||||
for (const row of rows) {
|
||||
const path = [];
|
||||
let current = row;
|
||||
|
||||
while (current) {
|
||||
path.unshift(current.name);
|
||||
current = current.master_cat_id ? byId.get(current.master_cat_id) : null;
|
||||
}
|
||||
|
||||
themes.push({
|
||||
id: row.cat_id,
|
||||
name: row.name,
|
||||
parentId: row.master_cat_id,
|
||||
type: row.type,
|
||||
fullPath: path.join(' > '),
|
||||
embeddingText: path.join(' ')
|
||||
});
|
||||
}
|
||||
|
||||
return themes;
|
||||
}
|
||||
|
||||
async _fetchColors(connection) {
|
||||
const [rows] = await connection.query(`
|
||||
SELECT color, name, hex_color
|
||||
FROM product_color_list
|
||||
ORDER BY \`order\`
|
||||
`);
|
||||
|
||||
return rows.map(row => ({
|
||||
id: row.color,
|
||||
name: row.name,
|
||||
hexColor: row.hex_color,
|
||||
embeddingText: row.name
|
||||
}));
|
||||
}
|
||||
|
||||
async _generateEmbeddings(items, label) {
|
||||
if (items.length === 0) {
|
||||
return items;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const texts = items.map(item => item.embeddingText);
|
||||
const results = [...items];
|
||||
|
||||
// Process in batches
|
||||
let batchNum = 0;
|
||||
for await (const chunk of this.provider.embedBatchChunked(texts, { batchSize: 100 })) {
|
||||
batchNum++;
|
||||
for (let i = 0; i < chunk.embeddings.length; i++) {
|
||||
const globalIndex = chunk.startIndex + i;
|
||||
results[globalIndex] = {
|
||||
...results[globalIndex],
|
||||
embedding: chunk.embeddings[i]
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const elapsed = Date.now() - startTime;
|
||||
this.logger.info(`[TaxonomyEmbeddings] Generated ${items.length} ${label} embeddings in ${elapsed}ms`);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { TaxonomyEmbeddings };
|
||||
Reference in New Issue
Block a user