Add Groq as AI provider + new inline AI tasks, extend database to support more prompt types
This commit is contained in:
178
inventory-server/src/services/ai/providers/groqProvider.js
Normal file
178
inventory-server/src/services/ai/providers/groqProvider.js
Normal file
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* Groq Provider - Handles chat completions via Groq's OpenAI-compatible API
|
||||
*
|
||||
* Uses Groq's fast inference for real-time AI validation tasks.
|
||||
* Supports models like openai/gpt-oss-120b (complex) and openai/gpt-oss-20b (simple).
|
||||
*/
|
||||
|
||||
const GROQ_BASE_URL = 'https://api.groq.com/openai/v1';
|
||||
|
||||
// Default models
|
||||
const MODELS = {
|
||||
LARGE: 'openai/gpt-oss-120b', // For complex tasks (descriptions, sanity checks)
|
||||
SMALL: 'openai/gpt-oss-20b' // For simple tasks (name validation)
|
||||
};
|
||||
|
||||
class GroqProvider {
|
||||
/**
|
||||
* @param {Object} options
|
||||
* @param {string} options.apiKey - Groq API key
|
||||
* @param {string} [options.baseUrl] - Override base URL
|
||||
* @param {number} [options.timeoutMs=30000] - Default timeout
|
||||
*/
|
||||
constructor({ apiKey, baseUrl = GROQ_BASE_URL, timeoutMs = 30000 }) {
|
||||
if (!apiKey) {
|
||||
throw new Error('Groq API key is required');
|
||||
}
|
||||
this.apiKey = apiKey;
|
||||
this.baseUrl = baseUrl;
|
||||
this.timeoutMs = timeoutMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat completion request
|
||||
*
|
||||
* @param {Object} params
|
||||
* @param {Array<{role: string, content: string}>} params.messages - Conversation messages
|
||||
* @param {string} [params.model] - Model to use (defaults to LARGE)
|
||||
* @param {number} [params.temperature=0.3] - Response randomness (0-2)
|
||||
* @param {number} [params.maxTokens=500] - Max tokens in response
|
||||
* @param {Object} [params.responseFormat] - For JSON mode: { type: 'json_object' }
|
||||
* @param {number} [params.timeoutMs] - Request timeout override
|
||||
* @returns {Promise<{content: string, parsed: Object|null, usage: Object, latencyMs: number, model: string}>}
|
||||
*/
|
||||
async chatCompletion({
|
||||
messages,
|
||||
model = MODELS.LARGE,
|
||||
temperature = 0.3,
|
||||
maxTokens = 500,
|
||||
responseFormat = null,
|
||||
timeoutMs = this.timeoutMs
|
||||
}) {
|
||||
const started = Date.now();
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
max_completion_tokens: maxTokens
|
||||
};
|
||||
|
||||
// Enable JSON mode if requested
|
||||
if (responseFormat?.type === 'json_object') {
|
||||
body.response_format = { type: 'json_object' };
|
||||
}
|
||||
|
||||
const response = await this._makeRequest('chat/completions', body, timeoutMs);
|
||||
|
||||
const content = response.choices?.[0]?.message?.content || '';
|
||||
const usage = response.usage || {};
|
||||
|
||||
// Attempt to parse JSON if response format was requested
|
||||
let parsed = null;
|
||||
if (responseFormat && content) {
|
||||
try {
|
||||
parsed = JSON.parse(content);
|
||||
} catch {
|
||||
// Content isn't valid JSON - try to extract JSON from markdown
|
||||
parsed = this._extractJson(content);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
parsed,
|
||||
usage: {
|
||||
promptTokens: usage.prompt_tokens || 0,
|
||||
completionTokens: usage.completion_tokens || 0,
|
||||
totalTokens: usage.total_tokens || 0
|
||||
},
|
||||
latencyMs: Date.now() - started,
|
||||
model: response.model || model
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract JSON from content that might be wrapped in markdown code blocks
|
||||
* @private
|
||||
*/
|
||||
_extractJson(content) {
|
||||
// Try to find JSON in code blocks
|
||||
const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (codeBlockMatch) {
|
||||
try {
|
||||
return JSON.parse(codeBlockMatch[1].trim());
|
||||
} catch {
|
||||
// Fall through
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find JSON object/array directly
|
||||
const jsonMatch = content.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
return JSON.parse(jsonMatch[1]);
|
||||
} catch {
|
||||
// Fall through
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an HTTP request to Groq API
|
||||
* @private
|
||||
*/
|
||||
async _makeRequest(endpoint, body, timeoutMs) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({}));
|
||||
const message = error.error?.message || `Groq API error: ${response.status}`;
|
||||
const err = new Error(message);
|
||||
err.status = response.status;
|
||||
err.code = error.error?.code;
|
||||
// Include failed_generation if available (for JSON mode failures)
|
||||
if (error.error?.failed_generation) {
|
||||
err.failedGeneration = error.error.failed_generation;
|
||||
console.error('[Groq] JSON validation failed. Model output:', error.error.failed_generation);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
return response.json();
|
||||
} catch (error) {
|
||||
if (error.name === 'AbortError') {
|
||||
const err = new Error(`Groq request timed out after ${timeoutMs}ms`);
|
||||
err.code = 'TIMEOUT';
|
||||
throw err;
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the provider is properly configured
|
||||
* @returns {boolean}
|
||||
*/
|
||||
isConfigured() {
|
||||
return !!this.apiKey;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { GroqProvider, MODELS, GROQ_BASE_URL };
|
||||
Reference in New Issue
Block a user