Split off AI prompt into separate file, auto include taxonomy in prompt, create prompt debug page

This commit is contained in:
2025-02-21 11:50:46 -05:00
parent 7f7e6fdd1f
commit cff176e7a3
5 changed files with 475 additions and 28 deletions

View File

@@ -0,0 +1,17 @@
I will provide a JSON array with product data. Process the array by combining all products from validData and invalidData arrays into a single array, excluding any fields starting with “__”, such as “__index” or “__errors”. Process each product according to the reference guidelines below. If a field is not included in the data, do not include it in your response unless the specific field guidelines below say otherwise. Please respond with:
Respond in the following JSON format:
{
"correctedData": [], // Array of corrected products
"changes": [], // Array of strings describing each change made
"warnings": [] // Array of strings with warnings or suggestions for manual review
}
Using the provided guidelines, focus on:
1. Correcting typos and any incorrect spelling or grammar
2. Standardizing product names
3. Correcting and enhancing descriptions by adding details, keywords, and SEO-friendly language
4. Fixing any obvious errors in measurements, prices, or quantities
5. Adding correct categories, themes, and colors
Use only the provided data and your own knowledge to make changes. Do not make assumptions or make up information that you're not sure about. If you're unable to make a change you're confident about, leave the field as is.

View File

@@ -1,35 +1,253 @@
const express = require('express'); const express = require('express');
const router = express.Router(); const router = express.Router();
const OpenAI = require('openai'); const OpenAI = require('openai');
const fs = require('fs').promises;
const path = require('path');
const openai = new OpenAI({ const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY apiKey: process.env.OPENAI_API_KEY
}); });
// Helper function to create the prompt for product validation // Cache configuration
function createValidationPrompt(products) { const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds
return `You are a product data validation assistant. Please review the following product data and suggest corrections or improvements. Focus on:
1. Standardizing product names and descriptions
2. Fixing any obvious errors in measurements, prices, or quantities
3. Ensuring consistency in formatting
4. Flagging any suspicious or invalid values
Here is the product data to validate: // Cache structure with TTL
${JSON.stringify(products, null, 2)} let cache = {
taxonomyData: null,
validationPrompt: null,
lastUpdated: null
};
Please respond with: // Function to check if cache is valid
1. The corrected product data in the exact same JSON format function isCacheValid() {
2. A list of changes made and why return cache.lastUpdated && (Date.now() - cache.lastUpdated) < CACHE_TTL;
3. Any warnings or suggestions for manual review
Respond in the following JSON format:
{
"correctedData": [], // Array of corrected products
"changes": [], // Array of changes made
"warnings": [] // Array of warnings or suggestions
}`;
} }
// Function to clear cache
function clearCache() {
cache = {
taxonomyData: null,
validationPrompt: null,
lastUpdated: null
};
}
// Debug endpoint to view prompt and cache status
router.get('/debug', async (req, res) => {
try {
console.log('Debug endpoint called');
const pool = req.app.locals.pool;
// Load taxonomy data first
console.log('Loading taxonomy data...');
const taxonomy = await getTaxonomyData(pool);
console.log('Taxonomy data loaded:', {
categoriesCount: taxonomy.categories.length,
themesCount: taxonomy.themes.length,
colorsCount: taxonomy.colors.length,
taxCodesCount: taxonomy.taxCodes.length,
sizeCategoriesCount: taxonomy.sizeCategories.length
});
// Then load the prompt
console.log('Loading prompt...');
const currentPrompt = await loadPrompt(pool);
const sampleData = [{ name: "Sample Product" }];
const fullPrompt = currentPrompt + '\n' + JSON.stringify(sampleData, null, 2);
const response = {
cacheStatus: {
isCacheValid: isCacheValid(),
lastUpdated: cache.lastUpdated ? new Date(cache.lastUpdated).toISOString() : null,
timeUntilExpiry: cache.lastUpdated ?
Math.max(0, CACHE_TTL - (Date.now() - cache.lastUpdated)) / 1000 + ' seconds' :
'expired',
},
taxonomyStats: taxonomy ? {
categories: countItems(taxonomy.categories),
themes: taxonomy.themes.length,
colors: taxonomy.colors.length,
taxCodes: taxonomy.taxCodes.length,
sizeCategories: taxonomy.sizeCategories.length
} : null,
basePrompt: currentPrompt,
sampleFullPrompt: fullPrompt,
promptLength: fullPrompt.length,
};
console.log('Sending response with stats:', response.taxonomyStats);
res.json(response);
} catch (error) {
console.error('Debug endpoint error:', error);
res.status(500).json({ error: error.message });
}
});
// Helper function to count total items in hierarchical structure
function countItems(items) {
return items.reduce((count, item) => {
return count + 1 + (item.subcategories ? countItems(item.subcategories) : 0);
}, 0);
}
// Force cache refresh endpoint
router.post('/refresh-cache', async (req, res) => {
try {
clearCache();
const pool = req.app.locals.pool;
await loadPrompt(pool); // This will rebuild the cache
res.json({
success: true,
message: 'Cache refreshed successfully',
newCacheTime: new Date(cache.lastUpdated).toISOString()
});
} catch (error) {
console.error('Cache refresh error:', error);
res.status(500).json({ error: error.message });
}
});
// Function to fetch and format taxonomy data
async function getTaxonomyData(pool) {
if (cache.taxonomyData && isCacheValid()) {
return cache.taxonomyData;
}
// Fetch categories with hierarchy
const [categories] = await pool.query(`
SELECT cat_id, name, master_cat_id, level_order
FROM (
SELECT cat_id,name,NULL AS master_cat_id,1 AS level_order
FROM product_categories s
WHERE type=10
UNION ALL
SELECT c.cat_id,c.name,c.master_cat_id,2 AS level_order
FROM product_categories c
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE c.type=11 AND s.type=10
UNION ALL
SELECT sc.cat_id,sc.name,sc.master_cat_id,3 AS level_order
FROM product_categories sc
JOIN product_categories c ON sc.master_cat_id=c.cat_id
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE sc.type=12 AND c.type=11 AND s.type=10
UNION ALL
SELECT ssc.cat_id,ssc.name,ssc.master_cat_id,4 AS level_order
FROM product_categories ssc
JOIN product_categories sc ON ssc.master_cat_id=sc.cat_id
JOIN product_categories c ON sc.master_cat_id=c.cat_id
JOIN product_categories s ON c.master_cat_id=s.cat_id
WHERE ssc.type=13 AND sc.type=12 AND c.type=11 AND s.type=10
) AS hierarchy
ORDER BY level_order,cat_id
`);
// Fetch themes with hierarchy
const [themes] = await pool.query(`
SELECT cat_id, name, master_cat_id, level_order
FROM (
SELECT t.cat_id,t.name,null as master_cat_id,1 AS level_order
FROM product_categories t
WHERE t.type=20
UNION ALL
SELECT ts.cat_id,ts.name,ts.master_cat_id,2 AS level_order
FROM product_categories ts
JOIN product_categories t ON ts.master_cat_id=t.cat_id
WHERE ts.type=21 AND t.type=20
) AS hierarchy
ORDER BY level_order,name
`);
// Fetch colors
const [colors] = await pool.query('SELECT color, name FROM product_color_list ORDER BY name');
// Fetch tax codes
const [taxCodes] = await pool.query('SELECT tax_code_id, name FROM product_tax_codes ORDER BY name');
// Fetch size categories
const [sizeCategories] = await pool.query('SELECT cat_id, name FROM product_categories WHERE type=50 ORDER BY name');
// Format categories into a hierarchical structure
const formatHierarchy = (items, level = 1, parentId = null) => {
return items
.filter(item => item.level_order === level && item.master_cat_id === parentId)
.map(item => {
const children = formatHierarchy(items, level + 1, item.cat_id);
return {
name: item.name,
...(children.length > 0 ? { subcategories: children } : {})
};
});
};
// Format themes similarly but with only two levels
const formatThemes = (items) => {
return items
.filter(item => item.level_order === 1)
.map(item => {
const subthemes = items
.filter(subitem => subitem.master_cat_id === item.cat_id)
.map(subitem => subitem.name);
return {
name: item.name,
...(subthemes.length > 0 ? { subthemes } : {})
};
});
};
cache.taxonomyData = {
categories: formatHierarchy(categories),
themes: formatThemes(themes),
colors: colors.map(c => c.name),
taxCodes: (taxCodes || []).map(tc => ({ id: tc.tax_code_id, name: tc.name })),
sizeCategories: (sizeCategories || []).map(sc => ({ id: sc.cat_id, name: sc.name }))
};
cache.lastUpdated = Date.now();
return cache.taxonomyData;
}
// Load the prompt from file and inject taxonomy data
async function loadPrompt(pool) {
if (cache.validationPrompt && isCacheValid()) {
return cache.validationPrompt;
}
const promptPath = path.join(__dirname, '..', 'prompts', 'product-validation.txt');
const basePrompt = await fs.readFile(promptPath, 'utf8');
// Get taxonomy data
const taxonomy = await getTaxonomyData(pool);
// Format taxonomy data for the prompt
const taxonomySection = `
Available Categories:
${JSON.stringify(taxonomy.categories)}
Available Themes:
${JSON.stringify(taxonomy.themes)}
Available Colors:
${JSON.stringify(taxonomy.colors)}
Available Tax Codes:
${JSON.stringify(taxonomy.taxCodes)}
Available Size Categories:
${JSON.stringify(taxonomy.sizeCategories)}
Here is the product data to validate:`;
// Combine the prompt sections
cache.validationPrompt = basePrompt + '\n' + taxonomySection;
cache.lastUpdated = Date.now();
return cache.validationPrompt;
}
// Set up cache clearing interval
setInterval(clearCache, CACHE_TTL);
router.post('/validate', async (req, res) => { router.post('/validate', async (req, res) => {
try { try {
const { products } = req.body; const { products } = req.body;
@@ -40,20 +258,22 @@ router.post('/validate', async (req, res) => {
return res.status(400).json({ error: 'Products must be an array' }); return res.status(400).json({ error: 'Products must be an array' });
} }
const prompt = createValidationPrompt(products); // Load the prompt and append the products data
console.log('📝 Generated prompt:', prompt); const basePrompt = await loadPrompt(req.app.locals.pool);
const fullPrompt = basePrompt + '\n' + JSON.stringify(products, null, 2);
console.log('📝 Generated prompt:', fullPrompt);
console.log('🤖 Sending request to OpenAI...'); console.log('🤖 Sending request to OpenAI...');
const completion = await openai.chat.completions.create({ const completion = await openai.chat.completions.create({
model: "gpt-4o-mini", model: "gpt-4-turbo-preview",
messages: [ messages: [
{ {
role: "system", role: "system",
content: "You are a product data validation assistant that helps ensure product data is accurate, consistent, and properly formatted." content: "You are a specialized e-commerce product data processor for a crafting supplies website tasked with providing complete, correct, appealing, and SEO-friendly product listings. You should write professionally, but in a friendly and engaging tone."
}, },
{ {
role: "user", role: "user",
content: prompt content: fullPrompt
} }
], ],
temperature: 0.3, temperature: 0.3,

View File

@@ -17,6 +17,7 @@ import { Vendors } from '@/pages/Vendors';
import { Categories } from '@/pages/Categories'; import { Categories } from '@/pages/Categories';
import { Import } from '@/pages/Import'; import { Import } from '@/pages/Import';
import { ChakraProvider } from '@chakra-ui/react'; import { ChakraProvider } from '@chakra-ui/react';
import { AiValidationDebug } from "@/pages/AiValidationDebug"
const queryClient = new QueryClient(); const queryClient = new QueryClient();
@@ -71,6 +72,7 @@ function App() {
<Route path="/analytics" element={<Analytics />} /> <Route path="/analytics" element={<Analytics />} />
<Route path="/settings" element={<Settings />} /> <Route path="/settings" element={<Settings />} />
<Route path="/forecasting" element={<Forecasting />} /> <Route path="/forecasting" element={<Forecasting />} />
<Route path="/ai-validation/debug" element={<AiValidationDebug />} />
<Route path="*" element={<Navigate to="/" replace />} /> <Route path="*" element={<Navigate to="/" replace />} />
</Route> </Route>
</Routes> </Routes>

View File

@@ -0,0 +1,206 @@
import { useEffect, useState } from "react"
import { Button } from "@/components/ui/button"
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
import { ScrollArea } from "@/components/ui/scroll-area"
import { Code } from "@/components/ui/code"
import { useToast } from "@/hooks/use-toast"
import { Loader2 } from "lucide-react"
import config from "@/config"
interface CacheStatus {
isCacheValid: boolean
lastUpdated: string | null
timeUntilExpiry: string
}
interface TaxonomyStats {
categories: number
themes: number
colors: number
taxCodes: number
sizeCategories: number
}
interface DebugData {
cacheStatus: CacheStatus
taxonomyStats: TaxonomyStats | null
basePrompt: string
sampleFullPrompt: string
promptLength: number
}
export function AiValidationDebug() {
const [isLoading, setIsLoading] = useState(false)
const [debugData, setDebugData] = useState<DebugData | null>(null)
const { toast } = useToast()
const fetchDebugData = async () => {
setIsLoading(true)
try {
const response = await fetch(`${config.apiUrl}/ai-validation/debug`)
if (!response.ok) {
throw new Error('Failed to fetch debug data')
}
const data = await response.json()
setDebugData(data)
} catch (error) {
console.error('Error fetching debug data:', error)
toast({
variant: "destructive",
title: "Error",
description: error instanceof Error ? error.message : "Failed to fetch debug data"
})
} finally {
setIsLoading(false)
}
}
const refreshCache = async () => {
if (!confirm('Are you sure you want to refresh the cache?')) return
setIsLoading(true)
try {
const response = await fetch(`${config.apiUrl}/ai-validation/refresh-cache`, {
method: 'POST'
})
if (!response.ok) {
throw new Error('Failed to refresh cache')
}
const data = await response.json()
if (data.success) {
toast({
title: "Success",
description: "Cache refreshed successfully"
})
fetchDebugData()
} else {
throw new Error(data.error || 'Failed to refresh cache')
}
} catch (error) {
console.error('Error refreshing cache:', error)
toast({
variant: "destructive",
title: "Error",
description: error instanceof Error ? error.message : "Failed to refresh cache"
})
} finally {
setIsLoading(false)
}
}
useEffect(() => {
fetchDebugData()
}, [])
return (
<div className="container mx-auto py-6 space-y-6">
<div className="flex items-center justify-between">
<h1 className="text-3xl font-bold tracking-tight">AI Validation Debug</h1>
<div className="space-x-4">
<Button
variant="outline"
onClick={fetchDebugData}
disabled={isLoading}
>
{isLoading && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
Refresh Data
</Button>
<Button
variant="outline"
onClick={refreshCache}
disabled={isLoading}
>
{isLoading && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
Force Cache Refresh
</Button>
</div>
</div>
{debugData && (
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
<Card>
<CardHeader>
<CardTitle>Cache Status</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-2">
<div>Valid: {debugData.cacheStatus.isCacheValid ? "Yes" : "No"}</div>
<div>Last Updated: {debugData.cacheStatus.lastUpdated || "never"}</div>
<div>Expires in: {debugData.cacheStatus.timeUntilExpiry}</div>
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle>Taxonomy Stats</CardTitle>
</CardHeader>
<CardContent>
{debugData.taxonomyStats ? (
<div className="space-y-2">
<div>Categories: {debugData.taxonomyStats.categories}</div>
<div>Themes: {debugData.taxonomyStats.themes}</div>
<div>Colors: {debugData.taxonomyStats.colors}</div>
<div>Tax Codes: {debugData.taxonomyStats.taxCodes}</div>
<div>Size Categories: {debugData.taxonomyStats.sizeCategories}</div>
</div>
) : (
<div>No taxonomy data available</div>
)}
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle>Prompt Length</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-4">
<div className="space-y-2">
<div>Characters: {debugData.promptLength}</div>
<div>Tokens (est.): ~{Math.round(debugData.promptLength / 4)}</div>
</div>
<div className="space-y-2">
<label htmlFor="costPerMillion" className="text-sm text-muted-foreground">
Cost per million tokens ($)
</label>
<input
id="costPerMillion"
type="number"
className="w-full px-3 py-2 border rounded-md"
defaultValue="3"
onChange={(e) => {
const costPerMillion = parseFloat(e.target.value)
if (!isNaN(costPerMillion)) {
const tokens = Math.round(debugData.promptLength / 4)
const cost = (tokens / 1_000_000) * costPerMillion * 100 // Convert to cents
const costElement = document.getElementById('tokenCost')
if (costElement) {
costElement.textContent = cost.toFixed(1)
}
}
}}
/>
<div className="text-sm">
Cost: <span id="tokenCost">{((Math.round(debugData.promptLength / 4) / 1_000_000) * 3 * 100).toFixed(1)}</span>¢
</div>
</div>
</div>
</CardContent>
</Card>
<Card className="col-span-full">
<CardHeader>
<CardTitle>Full Sample Prompt</CardTitle>
</CardHeader>
<CardContent>
<ScrollArea className="h-[500px] w-full rounded-md border p-4">
<Code className="whitespace-pre-wrap">{debugData.sampleFullPrompt}</Code>
</ScrollArea>
</CardContent>
</Card>
</div>
)}
</div>
)
}

View File

@@ -27,7 +27,7 @@ const BASE_IMPORT_FIELDS = [
label: "UPC", label: "UPC",
key: "upc", key: "upc",
description: "Universal Product Code/Barcode", description: "Universal Product Code/Barcode",
alternateMatches: ["upc","UPC","barcode", "bar code", "JAN", "EAN"], alternateMatches: ["barcode", "bar code", "jan", "ean"],
fieldType: { type: "input" }, fieldType: { type: "input" },
width: 140, width: 140,
validations: [ validations: [
@@ -94,7 +94,7 @@ const BASE_IMPORT_FIELDS = [
label: "MSRP", label: "MSRP",
key: "msrp", key: "msrp",
description: "Manufacturer's Suggested Retail Price", description: "Manufacturer's Suggested Retail Price",
alternateMatches: ["retail", "retail price", "sugg retail", "price", "sugg. Retail","msrp","MSRP"], alternateMatches: ["retail", "retail price", "sugg retail", "price", "sugg. retail","default price"],
fieldType: { fieldType: {
type: "input", type: "input",
price: true price: true
@@ -136,7 +136,7 @@ const BASE_IMPORT_FIELDS = [
label: "Case Pack", label: "Case Pack",
key: "case_qty", key: "case_qty",
description: "Number of units per case", description: "Number of units per case",
alternateMatches: ["mc qty","MC Qty","case qty","Case Qty"], alternateMatches: ["mc qty","case qty","case pack"],
fieldType: { type: "input" }, fieldType: { type: "input" },
width: 50, width: 50,
validations: [ validations: [
@@ -208,6 +208,7 @@ const BASE_IMPORT_FIELDS = [
label: "Weight", label: "Weight",
key: "weight", key: "weight",
description: "Product weight (in lbs)", description: "Product weight (in lbs)",
alternateMatches: ["weight (lbs.)"],
fieldType: { type: "input" }, fieldType: { type: "input" },
width: 100, width: 100,
validations: [ validations: [
@@ -295,6 +296,7 @@ const BASE_IMPORT_FIELDS = [
label: "Description", label: "Description",
key: "description", key: "description",
description: "Detailed product description", description: "Detailed product description",
alternateMatches: ["details/description"],
fieldType: { fieldType: {
type: "input", type: "input",
multiline: true multiline: true