inventory/inventory-server/src/services/ai/index.js

/**
 * AI Service
 *
 * Main entry point for AI functionality including:
 * - Embeddings for taxonomy suggestions (OpenAI)
 * - Chat completions for validation tasks (Groq)
 * - Task registry for AI operations
 */

const { OpenAIProvider } = require('./providers/openaiProvider');
const { GroqProvider, MODELS: GROQ_MODELS } = require('./providers/groqProvider');
const { TaxonomyEmbeddings } = require('./embeddings/taxonomyEmbeddings');
const { cosineSimilarity, findTopMatches } = require('./embeddings/similarity');
const { getRegistry, TASK_IDS, registerAllTasks } = require('./tasks');

let initialized = false;
let initializing = false;
let openaiProvider = null;
let groqProvider = null;
let taxonomyEmbeddings = null;
let logger = console;

// Store pool reference for task access
let appPool = null;

/**
 * Initialize the AI service
 * @param {Object} options
 * @param {string} options.openaiApiKey - OpenAI API key (for embeddings)
 * @param {string} [options.groqApiKey] - Groq API key (for chat completions)
 * @param {Object} options.mysqlConnection - MySQL connection for taxonomy data
 * @param {Object} [options.pool] - PostgreSQL pool for prompt loading
 * @param {Object} [options.logger] - Logger instance
 */
async function initialize({ openaiApiKey, groqApiKey, mysqlConnection, pool, logger: customLogger }) {
  if (initialized) {
    return { success: true, message: 'Already initialized' };
  }

  if (initializing) {
    // Wait for existing initialization
    while (initializing) {
      await new Promise(resolve => setTimeout(resolve, 100));
    }
    return { success: initialized, message: initialized ? 'Initialized' : 'Initialization failed' };
  }

  initializing = true;

  try {
    if (customLogger) {
      logger = customLogger;
    }

    if (!openaiApiKey) {
      throw new Error('OpenAI API key is required');
    }

    logger.info('[AI] Initializing AI service...');

    // Store pool reference for tasks
    if (pool) {
      appPool = pool;
    }

    // Create OpenAI provider (for embeddings)
    openaiProvider = new OpenAIProvider({ apiKey: openaiApiKey });

    // Create Groq provider (for chat completions) if API key provided
    if (groqApiKey) {
      groqProvider = new GroqProvider({ apiKey: groqApiKey });
      logger.info('[AI] Groq provider initialized for chat completions');
    } else {
      logger.warn('[AI] No Groq API key provided - chat completion tasks will not be available');
    }

    // Create and initialize taxonomy embeddings
    taxonomyEmbeddings = new TaxonomyEmbeddings({
      provider: openaiProvider,
      logger
    });

    const stats = await taxonomyEmbeddings.initialize(mysqlConnection);

    // Register validation tasks if Groq is available
    if (groqProvider) {
      registerValidationTasks();
    }

    initialized = true;
    logger.info('[AI] AI service initialized', {
      ...stats,
      groqEnabled: !!groqProvider,
      tasksRegistered: getRegistry().list()
    });

    return {
      success: true,
      message: 'Initialized',
      stats,
      groqEnabled: !!groqProvider
    };
  } catch (error) {
    logger.error('[AI] Initialization failed:', error);
    return { success: false, message: error.message };
  } finally {
    initializing = false;
  }
}

/**
 * Register validation tasks with the task registry
 * Called during initialization if Groq is available
 */
function registerValidationTasks() {
  registerAllTasks(logger);
  logger.info('[AI] Validation tasks registered');
}

/**
 * Check if service is ready
 */
function isReady() {
  return initialized && taxonomyEmbeddings?.isReady();
}

/**
 * Build weighted product text for embedding.
 * Weights the product name heavily by repeating it, and truncates long descriptions
 * to prevent verbose marketing copy from drowning out the product signal.
 *
 * @param {Object} product - Product with name, description, company, line
 * @returns {string} - Combined text for embedding
 */
function buildProductText(product) {
  const parts = [];
  const name = product.name?.trim();
  const description = product.description?.trim();
  const company = (product.company_name || product.company)?.trim();
  const line = (product.line_name || product.line)?.trim();

  // Name is most important - repeat 3x to weight it heavily in the embedding
  if (name) {
    parts.push(name, name, name);
  }

  // Company and line provide context
  if (company) {
    parts.push(company);
  }
  if (line) {
    parts.push(line);
  }

  // Truncate description to prevent it from overwhelming the signal
  if (description) {
    const truncated = description.length > 500
      ? description.substring(0, 500) + '...'
      : description;
    parts.push(truncated);
  }

  return parts.join(' ').trim();
}

/**
 * Generate embedding for a product
 * @param {Object} product - Product with name, description, company, line
 * @returns {Promise<{embedding: number[], latencyMs: number}>}
 */
async function getProductEmbedding(product) {
  if (!initialized || !openaiProvider) {
    throw new Error('AI service not initialized');
  }

  const text = buildProductText(product);

  if (!text) {
    return { embedding: null, latencyMs: 0 };
  }

  const result = await openaiProvider.embed(text);

  return {
    embedding: result.embeddings[0],
    latencyMs: result.latencyMs
  };
}

/**
 * Generate embeddings for multiple products
 * @param {Object[]} products - Array of products
 * @returns {Promise<{embeddings: Array<{index: number, embedding: number[]}>, latencyMs: number}>}
 */
async function getProductEmbeddings(products) {
  if (!initialized || !openaiProvider) {
    throw new Error('AI service not initialized');
  }

  const texts = products.map(buildProductText);

  // Track which products have empty text
  const validIndices = texts.map((t, i) => t ? i : -1).filter(i => i >= 0);
  const validTexts = texts.filter(t => t);

  if (validTexts.length === 0) {
    return { embeddings: [], latencyMs: 0 };
  }

  const result = await openaiProvider.embed(validTexts);

  // Map embeddings back to original indices
  const embeddings = validIndices.map((originalIndex, resultIndex) => ({
    index: originalIndex,
    embedding: result.embeddings[resultIndex]
  }));

  return {
    embeddings,
    latencyMs: result.latencyMs
  };
}

/**
 * Find similar taxonomy items for a product embedding
 * @param {number[]} productEmbedding
 * @param {Object} options
 * @returns {{categories: Array, themes: Array, colors: Array}}
 */
function findSimilarTaxonomy(productEmbedding, options = {}) {
  if (!initialized || !taxonomyEmbeddings) {
    throw new Error('AI service not initialized');
  }

  const topCategories = options.topCategories ?? 10;
  const topThemes = options.topThemes ?? 5;
  const topColors = options.topColors ?? 5;

  return {
    categories: taxonomyEmbeddings.findSimilarCategories(productEmbedding, topCategories),
    themes: taxonomyEmbeddings.findSimilarThemes(productEmbedding, topThemes),
    colors: taxonomyEmbeddings.findSimilarColors(productEmbedding, topColors)
  };
}

/**
 * Get product embedding and find similar taxonomy in one call
 * @param {Object} product
 * @param {Object} options
 */
async function getSuggestionsForProduct(product, options = {}) {
  const { embedding, latencyMs: embeddingLatency } = await getProductEmbedding(product);

  if (!embedding) {
    return {
      categories: [],
      themes: [],
      colors: [],
      latencyMs: embeddingLatency
    };
  }

  const startSearch = Date.now();
  const suggestions = findSimilarTaxonomy(embedding, options);
  const searchLatency = Date.now() - startSearch;

  return {
    ...suggestions,
    latencyMs: embeddingLatency + searchLatency,
    embeddingLatencyMs: embeddingLatency,
    searchLatencyMs: searchLatency
  };
}

/**
 * Get all taxonomy data (without embeddings) for frontend
 */
function getTaxonomyData() {
  if (!initialized || !taxonomyEmbeddings) {
    throw new Error('AI service not initialized');
  }

  return taxonomyEmbeddings.getTaxonomyData();
}

/**
 * Get service status
 */
function getStatus() {
  const registry = getRegistry();

  return {
    initialized,
    ready: isReady(),
    hasOpenAI: !!openaiProvider,
    hasGroq: !!groqProvider,
    hasTaxonomy: !!taxonomyEmbeddings,
    taxonomyStats: taxonomyEmbeddings ? {
      categories: taxonomyEmbeddings.categories?.length || 0,
      themes: taxonomyEmbeddings.themes?.length || 0,
      colors: taxonomyEmbeddings.colors?.length || 0
    } : null,
    tasks: {
      registered: registry.list(),
      count: registry.size()
    }
  };
}

/**
 * Run an AI task by ID
 * @param {string} taskId - Task identifier from TASK_IDS
 * @param {Object} payload - Task-specific input
 * @returns {Promise<Object>} Task result
 */
async function runTask(taskId, payload = {}) {
  if (!initialized) {
    throw new Error('AI service not initialized');
  }

  if (!groqProvider) {
    throw new Error('Groq provider not available - chat completion tasks require GROQ_API_KEY');
  }

  const registry = getRegistry();
  return registry.runTask(taskId, {
    ...payload,
    // Inject dependencies tasks may need
    provider: groqProvider,
    // Use pool from payload if provided (from route), fall back to stored appPool
    pool: payload.pool || appPool,
    logger
  });
}

/**
 * Get the Groq provider instance (for direct use if needed)
 * @returns {GroqProvider|null}
 */
function getGroqProvider() {
  return groqProvider;
}

/**
 * Get the PostgreSQL pool (for tasks that need DB access)
 * @returns {Object|null}
 */
function getPool() {
  return appPool;
}

/**
 * Check if chat completion tasks are available
 * @returns {boolean}
 */
function hasChatCompletion() {
  return !!groqProvider;
}

module.exports = {
  // Initialization
  initialize,
  isReady,
  getStatus,

  // Embeddings (OpenAI)
  getProductEmbedding,
  getProductEmbeddings,
  findSimilarTaxonomy,
  getSuggestionsForProduct,
  getTaxonomyData,

  // Chat completions (Groq)
  runTask,
  hasChatCompletion,
  getGroqProvider,
  getPool,

  // Constants
  TASK_IDS,
  GROQ_MODELS,

  // Re-export utilities
  cosineSimilarity,
  findTopMatches
};