/** * Vector similarity utilities */ /** * Compute cosine similarity between two vectors * @param {number[]} a * @param {number[]} b * @returns {number} Similarity score between -1 and 1 */ function cosineSimilarity(a, b) { if (!a || !b || a.length !== b.length) { return 0; } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denominator = Math.sqrt(normA) * Math.sqrt(normB); if (denominator === 0) return 0; return dotProduct / denominator; } /** * Find top K most similar items from a collection * @param {number[]} queryEmbedding - The embedding to search for * @param {Array<{id: any, embedding: number[]}>} items - Items with embeddings * @param {number} topK - Number of results to return * @returns {Array<{id: any, similarity: number}>} */ function findTopMatches(queryEmbedding, items, topK = 10) { if (!queryEmbedding || !items || items.length === 0) { return []; } const scored = items.map(item => ({ id: item.id, similarity: cosineSimilarity(queryEmbedding, item.embedding) })); scored.sort((a, b) => b.similarity - a.similarity); return scored.slice(0, topK); } /** * Find matches above a similarity threshold * @param {number[]} queryEmbedding * @param {Array<{id: any, embedding: number[]}>} items * @param {number} threshold - Minimum similarity (0-1) * @returns {Array<{id: any, similarity: number}>} */ function findMatchesAboveThreshold(queryEmbedding, items, threshold = 0.5) { if (!queryEmbedding || !items || items.length === 0) { return []; } const scored = items .map(item => ({ id: item.id, similarity: cosineSimilarity(queryEmbedding, item.embedding) })) .filter(item => item.similarity >= threshold); scored.sort((a, b) => b.similarity - a.similarity); return scored; } module.exports = { cosineSimilarity, findTopMatches, findMatchesAboveThreshold };