Add in forecasting, lifecycle phases, associated component and script changes

2026-02-13 22:45:18 -05:00
parent f41b5ab0f6
commit 45ded53530
29 changed files with 3643 additions and 376 deletions
@@ -0,0 +1,5 @@
+numpy>=1.24
+scipy>=1.10
+pandas>=2.0
+psycopg2-binary>=2.9
+statsmodels>=0.14
@@ -0,0 +1,128 @@
+#!/usr/bin/env node
+/**
+ * Forecast Pipeline Orchestrator
+ *
+ * Spawns the Python forecast engine with database credentials from the
+ * environment. Can be run manually, via cron, or integrated into the
+ * existing metrics pipeline.
+ *
+ * Usage:
+ *   node run_forecast.js
+ *
+ * Environment:
+ *   Reads DB_HOST, DB_USER, DB_PASSWORD, DB_NAME, DB_PORT from
+ *   /var/www/html/inventory/.env (or current process env).
+ */
+
+const { spawn } = require('child_process');
+const path = require('path');
+const fs = require('fs');
+
+// Load .env file if it exists (production path)
+const envPaths = [
+    '/var/www/html/inventory/.env',
+    path.join(__dirname, '../../.env'),
+];
+
+for (const envPath of envPaths) {
+    if (fs.existsSync(envPath)) {
+        const envContent = fs.readFileSync(envPath, 'utf-8');
+        for (const line of envContent.split('\n')) {
+            const trimmed = line.trim();
+            if (!trimmed || trimmed.startsWith('#')) continue;
+            const eqIndex = trimmed.indexOf('=');
+            if (eqIndex === -1) continue;
+            const key = trimmed.slice(0, eqIndex);
+            const value = trimmed.slice(eqIndex + 1);
+            if (!process.env[key]) {
+                process.env[key] = value;
+            }
+        }
+        console.log(`Loaded env from ${envPath}`);
+        break;
+    }
+}
+
+// Verify required env vars
+const required = ['DB_HOST', 'DB_USER', 'DB_PASSWORD', 'DB_NAME'];
+const missing = required.filter(k => !process.env[k]);
+if (missing.length > 0) {
+    console.error(`Missing required environment variables: ${missing.join(', ')}`);
+    process.exit(1);
+}
+
+const SCRIPT_DIR = __dirname;
+const PYTHON_SCRIPT = path.join(SCRIPT_DIR, 'forecast_engine.py');
+const VENV_DIR = path.join(SCRIPT_DIR, 'venv');
+const REQUIREMENTS = path.join(SCRIPT_DIR, 'requirements.txt');
+
+// Determine python binary (prefer venv if it exists)
+function getPythonBin() {
+    const venvPython = path.join(VENV_DIR, 'bin', 'python');
+    if (fs.existsSync(venvPython)) return venvPython;
+
+    // Fall back to system python
+    return 'python3';
+}
+
+// Ensure venv and dependencies are installed
+async function ensureDependencies() {
+    if (!fs.existsSync(path.join(VENV_DIR, 'bin', 'python'))) {
+        console.log('Creating virtual environment...');
+        await runCommand('python3', ['-m', 'venv', VENV_DIR]);
+    }
+
+    // Always run pip install — idempotent, fast when packages already present
+    console.log('Checking dependencies...');
+    const python = path.join(VENV_DIR, 'bin', 'python');
+    await runCommand(python, ['-m', 'pip', 'install', '--quiet', '-r', REQUIREMENTS]);
+}
+
+function runCommand(cmd, args, options = {}) {
+    return new Promise((resolve, reject) => {
+        const proc = spawn(cmd, args, {
+            stdio: 'inherit',
+            ...options,
+        });
+        proc.on('close', code => {
+            if (code === 0) resolve();
+            else reject(new Error(`${cmd} exited with code ${code}`));
+        });
+        proc.on('error', reject);
+    });
+}
+
+async function main() {
+    const startTime = Date.now();
+    console.log('='.repeat(60));
+    console.log(`Forecast Pipeline - ${new Date().toISOString()}`);
+    console.log('='.repeat(60));
+
+    try {
+        await ensureDependencies();
+
+        const pythonBin = getPythonBin();
+        console.log(`Using Python: ${pythonBin}`);
+        console.log(`Running: ${PYTHON_SCRIPT}`);
+        console.log('');
+
+        await runCommand(pythonBin, [PYTHON_SCRIPT], {
+            env: {
+                ...process.env,
+                PYTHONUNBUFFERED: '1',  // Real-time output
+            },
+        });
+
+        const duration = ((Date.now() - startTime) / 1000).toFixed(1);
+        console.log('');
+        console.log('='.repeat(60));
+        console.log(`Forecast pipeline completed in ${duration}s`);
+        console.log('='.repeat(60));
+    } catch (err) {
+        const duration = ((Date.now() - startTime) / 1000).toFixed(1);
+        console.error(`Forecast pipeline FAILED after ${duration}s:`, err.message);
+        process.exit(1);
+    }
+}
+
+main();
@@ -0,0 +1,51 @@
+-- Forecasting Pipeline Tables
+-- Run once to create the schema. Safe to re-run (IF NOT EXISTS).
+
+-- Precomputed reference decay curves per brand (or brand x category at any hierarchy level)
+CREATE TABLE IF NOT EXISTS brand_lifecycle_curves (
+    id SERIAL PRIMARY KEY,
+    brand TEXT NOT NULL,
+    root_category TEXT,                      -- NULL = brand-level fallback curve, else category name
+    cat_id BIGINT,                           -- NULL = brand-only; else category_hierarchy.cat_id for precise matching
+    category_level SMALLINT,                 -- NULL = brand-only; 0-3 = hierarchy depth
+    amplitude NUMERIC(10,4),                 -- A in: sales(t) = A * exp(-λt) + C
+    decay_rate NUMERIC(10,6),                -- λ  (higher = faster decay)
+    baseline NUMERIC(10,4),                  -- C  (long-tail steady-state daily sales)
+    r_squared NUMERIC(6,4),                  -- goodness of fit
+    sample_size INT,                         -- number of products that informed this curve
+    median_first_week_sales NUMERIC(10,2),   -- for scaling new launches
+    median_preorder_sales NUMERIC(10,2),     -- for scaling pre-order products
+    median_preorder_days NUMERIC(10,2),      -- median pre-order accumulation window (days)
+    computed_at TIMESTAMP DEFAULT NOW(),
+    UNIQUE(brand, cat_id)
+);
+
+-- Per-product daily forecasts (next 90 days, regenerated each run)
+CREATE TABLE IF NOT EXISTS product_forecasts (
+    pid BIGINT NOT NULL,
+    forecast_date DATE NOT NULL,
+    forecast_units NUMERIC(10,2),
+    forecast_revenue NUMERIC(14,4),
+    lifecycle_phase TEXT,                     -- preorder, launch, decay, mature, slow_mover, dormant
+    forecast_method TEXT,                     -- lifecycle_curve, exp_smoothing, velocity, zero
+    confidence_lower NUMERIC(10,2),
+    confidence_upper NUMERIC(10,2),
+    generated_at TIMESTAMP DEFAULT NOW(),
+    PRIMARY KEY (pid, forecast_date)
+);
+
+CREATE INDEX IF NOT EXISTS idx_pf_date ON product_forecasts(forecast_date);
+CREATE INDEX IF NOT EXISTS idx_pf_phase ON product_forecasts(lifecycle_phase);
+
+-- Forecast run history (for monitoring)
+CREATE TABLE IF NOT EXISTS forecast_runs (
+    id SERIAL PRIMARY KEY,
+    started_at TIMESTAMP NOT NULL,
+    finished_at TIMESTAMP,
+    status TEXT DEFAULT 'running',            -- running, completed, failed
+    products_forecast INT,
+    phase_counts JSONB,                       -- {"launch": 50, "decay": 200, ...}
+    curve_count INT,                          -- brand curves computed
+    error_message TEXT,
+    duration_seconds NUMERIC(10,2)
+);