83 lines
2.0 KiB
JavaScript
83 lines
2.0 KiB
JavaScript
/**
|
|
* Vector similarity utilities
|
|
*/
|
|
|
|
/**
|
|
* Compute cosine similarity between two vectors
|
|
* @param {number[]} a
|
|
* @param {number[]} b
|
|
* @returns {number} Similarity score between -1 and 1
|
|
*/
|
|
function cosineSimilarity(a, b) {
|
|
if (!a || !b || a.length !== b.length) {
|
|
return 0;
|
|
}
|
|
|
|
let dotProduct = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
|
|
for (let i = 0; i < a.length; i++) {
|
|
dotProduct += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
|
|
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
if (denominator === 0) return 0;
|
|
|
|
return dotProduct / denominator;
|
|
}
|
|
|
|
/**
|
|
* Find top K most similar items from a collection
|
|
* @param {number[]} queryEmbedding - The embedding to search for
|
|
* @param {Array<{id: any, embedding: number[]}>} items - Items with embeddings
|
|
* @param {number} topK - Number of results to return
|
|
* @returns {Array<{id: any, similarity: number}>}
|
|
*/
|
|
function findTopMatches(queryEmbedding, items, topK = 10) {
|
|
if (!queryEmbedding || !items || items.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const scored = items.map(item => ({
|
|
id: item.id,
|
|
similarity: cosineSimilarity(queryEmbedding, item.embedding)
|
|
}));
|
|
|
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
|
|
return scored.slice(0, topK);
|
|
}
|
|
|
|
/**
|
|
* Find matches above a similarity threshold
|
|
* @param {number[]} queryEmbedding
|
|
* @param {Array<{id: any, embedding: number[]}>} items
|
|
* @param {number} threshold - Minimum similarity (0-1)
|
|
* @returns {Array<{id: any, similarity: number}>}
|
|
*/
|
|
function findMatchesAboveThreshold(queryEmbedding, items, threshold = 0.5) {
|
|
if (!queryEmbedding || !items || items.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const scored = items
|
|
.map(item => ({
|
|
id: item.id,
|
|
similarity: cosineSimilarity(queryEmbedding, item.embedding)
|
|
}))
|
|
.filter(item => item.similarity >= threshold);
|
|
|
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
|
|
return scored;
|
|
}
|
|
|
|
module.exports = {
|
|
cosineSimilarity,
|
|
findTopMatches,
|
|
findMatchesAboveThreshold
|
|
};
|