diff --git a/.gitignore b/.gitignore index df5a263..6bc1138 100644 --- a/.gitignore +++ b/.gitignore @@ -84,4 +84,6 @@ chat-migration*/** venv/ venv/** **/venv/* -**/venv/** \ No newline at end of file +**/venv/** + +inventory-server/data/taxonomy-embeddings.json \ No newline at end of file diff --git a/inventory-server/src/routes/ai.js b/inventory-server/src/routes/ai.js index 2f3dcab..04771d4 100644 --- a/inventory-server/src/routes/ai.js +++ b/inventory-server/src/routes/ai.js @@ -51,6 +51,10 @@ async function ensureInitialized() { ...result.stats, groqEnabled: result.groqEnabled }); + + // Watch for taxonomy changes in the background (checks every hour) + aiService.startBackgroundCheck(getDbConnection); + return true; } catch (error) { console.error('[AI Routes] Failed to initialize AI service:', error); @@ -431,4 +435,16 @@ router.post('/validate/sanity-check', async (req, res) => { } }); +/** + * Kick off AI initialization in the background (no-op if already initialized). + * Call once from server startup so the taxonomy embeddings are ready before + * the first user request hits a taxonomy dropdown. + */ +function initInBackground() { + ensureInitialized().catch(err => + console.error('[AI Routes] Background initialization failed:', err) + ); +} + module.exports = router; +module.exports.initInBackground = initInBackground; diff --git a/inventory-server/src/server.js b/inventory-server/src/server.js index 2e496d5..d12d749 100644 --- a/inventory-server/src/server.js +++ b/inventory-server/src/server.js @@ -162,6 +162,8 @@ async function startServer() { const PORT = process.env.PORT || 3000; app.listen(PORT, () => { console.log(`[Server] Running in ${process.env.NODE_ENV || 'development'} mode on port ${PORT}`); + // Pre-warm AI service so taxonomy embeddings are ready before first user request + aiRouter.initInBackground(); }); } catch (error) { console.error('Failed to start server:', error); diff --git a/inventory-server/src/services/ai/embeddings/taxonomyEmbeddings.js b/inventory-server/src/services/ai/embeddings/taxonomyEmbeddings.js index bacf55a..5526f14 100644 --- a/inventory-server/src/services/ai/embeddings/taxonomyEmbeddings.js +++ b/inventory-server/src/services/ai/embeddings/taxonomyEmbeddings.js @@ -3,13 +3,26 @@ * * Generates and caches embeddings for categories, themes, and colors. * Excludes "Black Friday", "Gifts", "Deals" categories and their children. + * + * Disk cache: embeddings are saved to data/taxonomy-embeddings.json and reused + * across server restarts. Cache is invalidated by content hash — if the taxonomy + * rows in MySQL change, the next check will detect it and regenerate automatically. + * + * Background check: after initialization, call startBackgroundCheck(getConnectionFn) + * to poll for taxonomy changes on a configurable interval (default 1h). */ +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); const { findTopMatches } = require('./similarity'); // Categories to exclude (and all their children) const EXCLUDED_CATEGORY_NAMES = ['black friday', 'gifts', 'deals']; +// Disk cache config +const CACHE_PATH = path.join(__dirname, '..', '..', '..', '..', 'data', 'taxonomy-embeddings.json'); + class TaxonomyEmbeddings { constructor({ provider, logger }) { this.provider = provider; @@ -25,12 +38,18 @@ class TaxonomyEmbeddings { this.themeMap = new Map(); this.colorMap = new Map(); + // Content hash of the last successfully built taxonomy (from DB rows) + this.contentHash = null; + this.initialized = false; this.initializing = false; + this._checkInterval = null; + this._regenerating = false; } /** - * Initialize embeddings - fetch taxonomy and generate embeddings + * Initialize embeddings — fetches raw taxonomy rows to compute a content hash, + * then either loads the matching disk cache or generates fresh embeddings. */ async initialize(connection) { if (this.initialized) { @@ -48,42 +67,36 @@ class TaxonomyEmbeddings { this.initializing = true; try { - this.logger.info('[TaxonomyEmbeddings] Starting initialization...'); + // Always fetch raw rows first — cheap (~10ms), no OpenAI calls. + // Used to compute a content hash for cache validation. + const rawRows = await this._fetchRawRows(connection); + const freshHash = this._computeContentHash(rawRows); - // Fetch raw taxonomy data - const [categories, themes, colors] = await Promise.all([ - this._fetchCategories(connection), - this._fetchThemes(connection), - this._fetchColors(connection) - ]); + const cached = this._loadCache(); + if (cached && cached.contentHash === freshHash) { + this.categories = cached.categories; + this.themes = cached.themes; + this.colors = cached.colors; + this.categoryMap = new Map(this.categories.map(c => [c.id, c])); + this.themeMap = new Map(this.themes.map(t => [t.id, t])); + this.colorMap = new Map(this.colors.map(c => [c.id, c])); + this.contentHash = freshHash; + this.initialized = true; + this.logger.info(`[TaxonomyEmbeddings] Loaded from cache: ${this.categories.length} categories, ${this.themes.length} themes, ${this.colors.length} colors`); + return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length }; + } - this.logger.info(`[TaxonomyEmbeddings] Fetched ${categories.length} categories, ${themes.length} themes, ${colors.length} colors`); - - // Generate embeddings in parallel - const [catEmbeddings, themeEmbeddings, colorEmbeddings] = await Promise.all([ - this._generateEmbeddings(categories, 'categories'), - this._generateEmbeddings(themes, 'themes'), - this._generateEmbeddings(colors, 'colors') - ]); - - // Store with embeddings - this.categories = catEmbeddings; - this.themes = themeEmbeddings; - this.colors = colorEmbeddings; - - // Build lookup maps - this.categoryMap = new Map(this.categories.map(c => [c.id, c])); - this.themeMap = new Map(this.themes.map(t => [t.id, t])); - this.colorMap = new Map(this.colors.map(c => [c.id, c])); + if (cached) { + this.logger.info('[TaxonomyEmbeddings] Taxonomy changed since cache was built, regenerating...'); + } else { + this.logger.info('[TaxonomyEmbeddings] No cache — fetching taxonomy and generating embeddings...'); + } + await this._buildAndEmbed(rawRows, freshHash); this.initialized = true; this.logger.info('[TaxonomyEmbeddings] Initialization complete'); - return { - categories: this.categories.length, - themes: this.themes.length, - colors: this.colors.length - }; + return { categories: this.categories.length, themes: this.themes.length, colors: this.colors.length }; } catch (error) { this.logger.error('[TaxonomyEmbeddings] Initialization failed:', error); throw error; @@ -92,6 +105,47 @@ class TaxonomyEmbeddings { } } + /** + * Start a background interval that checks for taxonomy changes and regenerates + * embeddings automatically if the content hash differs. + * + * @param {Function} getConnectionFn - async function returning { connection } + * @param {number} intervalMs - check interval, default 1 hour + */ + startBackgroundCheck(getConnectionFn, intervalMs = 60 * 60 * 1000) { + if (this._checkInterval) return; + + this.logger.info(`[TaxonomyEmbeddings] Background taxonomy check started (every ${intervalMs / 60000} min)`); + + this._checkInterval = setInterval(async () => { + if (this._regenerating) return; + + try { + const { connection } = await getConnectionFn(); + const rawRows = await this._fetchRawRows(connection); + const freshHash = this._computeContentHash(rawRows); + + if (freshHash === this.contentHash) return; + + this.logger.info('[TaxonomyEmbeddings] Taxonomy changed, regenerating embeddings in background...'); + this._regenerating = true; + await this._buildAndEmbed(rawRows, freshHash); + this.logger.info('[TaxonomyEmbeddings] Background regeneration complete'); + } catch (err) { + this.logger.warn('[TaxonomyEmbeddings] Background taxonomy check failed:', err.message); + } finally { + this._regenerating = false; + } + }, intervalMs); + } + + stopBackgroundCheck() { + if (this._checkInterval) { + clearInterval(this._checkInterval); + this._checkInterval = null; + } + } + /** * Find similar categories for a product embedding */ @@ -176,29 +230,74 @@ class TaxonomyEmbeddings { // Private Methods // ============================================================================ - async _fetchCategories(connection) { - // Fetch hierarchical categories (types 10-13) - const [rows] = await connection.query(` - SELECT cat_id, name, master_cat_id, type - FROM product_categories - WHERE type IN (10, 11, 12, 13) - ORDER BY type, name - `); + /** + * Fetch minimal raw rows from MySQL — used for content hash computation. + * This is the cheap path: no path-building, no embeddings, just the raw data. + */ + async _fetchRawRows(connection) { + const [[catRows], [themeRows], [colorRows]] = await Promise.all([ + connection.query('SELECT cat_id, name, master_cat_id, type FROM product_categories WHERE type IN (10, 11, 12, 13) ORDER BY cat_id'), + connection.query('SELECT cat_id, name, master_cat_id, type FROM product_categories WHERE type IN (20, 21) ORDER BY cat_id'), + connection.query('SELECT color, name, hex_color FROM product_color_list ORDER BY `order`') + ]); + return { catRows, themeRows, colorRows }; + } - // Build lookup for hierarchy + /** + * Compute a stable SHA-256 hash of the taxonomy row content. + * Any change to IDs, names, or parent relationships will produce a different hash. + */ + _computeContentHash({ catRows, themeRows, colorRows }) { + const content = JSON.stringify({ + cats: catRows.map(r => [r.cat_id, r.name, r.master_cat_id]).sort((a, b) => a[0] - b[0]), + themes: themeRows.map(r => [r.cat_id, r.name, r.master_cat_id]).sort((a, b) => a[0] - b[0]), + colors: colorRows.map(r => [r.color, r.name]).sort() + }); + return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16); + } + + /** + * Build full taxonomy objects and generate embeddings, then atomically swap + * the in-memory state. Called on cache miss and on background change detection. + */ + async _buildAndEmbed(rawRows, contentHash) { + const { catRows, themeRows, colorRows } = rawRows; + + const categories = this._buildCategories(catRows); + const themes = this._buildThemes(themeRows); + const colors = this._buildColors(colorRows); + + this.logger.info(`[TaxonomyEmbeddings] Generating embeddings for ${categories.length} categories, ${themes.length} themes, ${colors.length} colors`); + + const [catEmbeddings, themeEmbeddings, colorEmbeddings] = await Promise.all([ + this._generateEmbeddings(categories, 'categories'), + this._generateEmbeddings(themes, 'themes'), + this._generateEmbeddings(colors, 'colors') + ]); + + // Atomic in-memory swap (single-threaded JS — readers always see a consistent state) + this.categories = catEmbeddings; + this.themes = themeEmbeddings; + this.colors = colorEmbeddings; + this.categoryMap = new Map(this.categories.map(c => [c.id, c])); + this.themeMap = new Map(this.themes.map(t => [t.id, t])); + this.colorMap = new Map(this.colors.map(c => [c.id, c])); + this.contentHash = contentHash; + + this._saveCache(); + } + + _buildCategories(rows) { const byId = new Map(rows.map(r => [r.cat_id, r])); - - // Find IDs of excluded top-level categories and all their descendants const excludedIds = new Set(); - // First pass: find excluded top-level categories for (const row of rows) { if (row.type === 10 && EXCLUDED_CATEGORY_NAMES.includes(row.name.toLowerCase())) { excludedIds.add(row.cat_id); } } - // Multiple passes to find all descendants + // Multiple passes to find all descendants of excluded categories let foundNew = true; while (foundNew) { foundNew = false; @@ -212,20 +311,14 @@ class TaxonomyEmbeddings { this.logger.info(`[TaxonomyEmbeddings] Excluding ${excludedIds.size} categories (Black Friday, Gifts, Deals and children)`); - // Build category objects with full paths, excluding filtered ones const categories = []; - for (const row of rows) { - if (excludedIds.has(row.cat_id)) { - continue; - } + if (excludedIds.has(row.cat_id)) continue; - const path = []; + const pathParts = []; let current = row; - - // Walk up the tree to build full path while (current) { - path.unshift(current.name); + pathParts.unshift(current.name); current = current.master_cat_id ? byId.get(current.master_cat_id) : null; } @@ -234,55 +327,37 @@ class TaxonomyEmbeddings { name: row.name, parentId: row.master_cat_id, type: row.type, - fullPath: path.join(' > '), - embeddingText: path.join(' ') + fullPath: pathParts.join(' > '), + embeddingText: pathParts.join(' ') }); } return categories; } - async _fetchThemes(connection) { - // Fetch themes (types 20-21) - const [rows] = await connection.query(` - SELECT cat_id, name, master_cat_id, type - FROM product_categories - WHERE type IN (20, 21) - ORDER BY type, name - `); - + _buildThemes(rows) { const byId = new Map(rows.map(r => [r.cat_id, r])); - const themes = []; - for (const row of rows) { - const path = []; + return rows.map(row => { + const pathParts = []; let current = row; - while (current) { - path.unshift(current.name); + pathParts.unshift(current.name); current = current.master_cat_id ? byId.get(current.master_cat_id) : null; } - themes.push({ + return { id: row.cat_id, name: row.name, parentId: row.master_cat_id, type: row.type, - fullPath: path.join(' > '), - embeddingText: path.join(' ') - }); - } - - return themes; + fullPath: pathParts.join(' > '), + embeddingText: pathParts.join(' ') + }; + }); } - async _fetchColors(connection) { - const [rows] = await connection.query(` - SELECT color, name, hex_color - FROM product_color_list - ORDER BY \`order\` - `); - + _buildColors(rows) { return rows.map(row => ({ id: row.color, name: row.name, @@ -301,9 +376,7 @@ class TaxonomyEmbeddings { const results = [...items]; // Process in batches - let batchNum = 0; for await (const chunk of this.provider.embedBatchChunked(texts, { batchSize: 100 })) { - batchNum++; for (let i = 0; i < chunk.embeddings.length; i++) { const globalIndex = chunk.startIndex + i; results[globalIndex] = { @@ -318,6 +391,43 @@ class TaxonomyEmbeddings { return results; } + + // ============================================================================ + // Disk Cache Methods + // ============================================================================ + + _loadCache() { + try { + if (!fs.existsSync(CACHE_PATH)) return null; + + const data = JSON.parse(fs.readFileSync(CACHE_PATH, 'utf8')); + if (!data.contentHash || !data.categories?.length || !data.themes?.length || !data.colors?.length) { + this.logger.warn('[TaxonomyEmbeddings] Disk cache malformed or missing content hash, will regenerate'); + return null; + } + + return data; + } catch (err) { + this.logger.warn('[TaxonomyEmbeddings] Failed to load disk cache:', err.message); + return null; + } + } + + _saveCache() { + try { + fs.mkdirSync(path.dirname(CACHE_PATH), { recursive: true }); + fs.writeFileSync(CACHE_PATH, JSON.stringify({ + generatedAt: new Date().toISOString(), + contentHash: this.contentHash, + categories: this.categories, + themes: this.themes, + colors: this.colors, + })); + this.logger.info(`[TaxonomyEmbeddings] Disk cache saved to ${CACHE_PATH}`); + } catch (err) { + this.logger.warn('[TaxonomyEmbeddings] Failed to save disk cache:', err.message); + } + } } module.exports = { TaxonomyEmbeddings }; diff --git a/inventory-server/src/services/ai/index.js b/inventory-server/src/services/ai/index.js index cc18d01..9f4538e 100644 --- a/inventory-server/src/services/ai/index.js +++ b/inventory-server/src/services/ai/index.js @@ -124,6 +124,17 @@ function isReady() { return initialized && taxonomyEmbeddings?.isReady(); } +/** + * Start background taxonomy change detection. + * Call once after initialization, passing a function that returns { connection }. + * @param {Function} getConnectionFn + * @param {number} [intervalMs] - default 1 hour + */ +function startBackgroundCheck(getConnectionFn, intervalMs) { + if (!initialized || !taxonomyEmbeddings) return; + taxonomyEmbeddings.startBackgroundCheck(getConnectionFn, intervalMs); +} + /** * Build weighted product text for embedding. * Weights the product name heavily by repeating it, and truncates long descriptions @@ -362,6 +373,7 @@ module.exports = { initialize, isReady, getStatus, + startBackgroundCheck, // Embeddings (OpenAI) getProductEmbedding, diff --git a/inventory/src/components/product-editor/EditableMultiSelect.tsx b/inventory/src/components/product-editor/EditableMultiSelect.tsx index 50233e1..3961ccb 100644 --- a/inventory/src/components/product-editor/EditableMultiSelect.tsx +++ b/inventory/src/components/product-editor/EditableMultiSelect.tsx @@ -1,7 +1,8 @@ -import { useState, useMemo, useCallback } from "react"; -import { Check, ChevronsUpDown } from "lucide-react"; +import { useState, useMemo, useCallback, useLayoutEffect, useRef } from "react"; +import { Check, ChevronsUpDown, Sparkles, Loader2 } from "lucide-react"; import { Button } from "@/components/ui/button"; -import { Badge } from "@/components/ui/badge"; +import { badgeVariants } from "@/components/ui/badge"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { Command, CommandEmpty, @@ -17,6 +18,7 @@ import { } from "@/components/ui/popover"; import { cn } from "@/lib/utils"; import type { FieldOption } from "./types"; +import type { TaxonomySuggestion } from "@/components/product-import/steps/ValidationStep/store/types"; interface ColorOption extends FieldOption { hex?: string; @@ -34,6 +36,39 @@ function isWhite(hex: string) { return /^#?f{3,6}$/i.test(hex); } +function TruncatedBadge({ label, hex }: { label: string; hex?: string }) { + const textRef = useRef(null); + const [isTruncated, setIsTruncated] = useState(false); + + useLayoutEffect(() => { + const el = textRef.current; + if (el) setIsTruncated(el.scrollWidth > el.clientWidth); + }, [label]); + + return ( + + + + {hex && ( + + )} + + {label} + + + + {label} + + ); +} + export function EditableMultiSelect({ options, value, @@ -42,6 +77,9 @@ export function EditableMultiSelect({ placeholder, searchPlaceholder, showColors, + suggestions, + isLoadingSuggestions, + onOpen, }: { options: FieldOption[]; value: string[]; @@ -50,9 +88,17 @@ export function EditableMultiSelect({ placeholder?: string; searchPlaceholder?: string; showColors?: boolean; + suggestions?: TaxonomySuggestion[]; + isLoadingSuggestions?: boolean; + onOpen?: () => void; }) { const [open, setOpen] = useState(false); + const handleOpenChange = useCallback((isOpen: boolean) => { + setOpen(isOpen); + if (isOpen) onOpen?.(); + }, [onOpen]); + const selectedLabels = useMemo(() => { return value.map((v) => { const opt = options.find((o) => String(o.value) === String(v)); @@ -82,7 +128,7 @@ export function EditableMultiSelect({ return (