Add chat page and chat server

2025-06-14 13:36:31 -04:00
parent 00501704df
commit b2330dee22
17 changed files with 3039 additions and 192 deletions
--- a/inventory-server/chat/db-convert/mongo_to_postgres_converter.py
+++ b/inventory-server/chat/db-convert/mongo_to_postgres_converter.py
@@ -0,0 +1,881 @@
+#!/usr/bin/env python3
+"""
+MongoDB to PostgreSQL Converter for Rocket.Chat
+Converts MongoDB BSON export files to PostgreSQL database
+
+Usage:
+    python3 mongo_to_postgres_converter.py \
+        --mongo-path db/database/62df06d44234d20001289144 \
+        --pg-database rocketchat_converted \
+        --pg-user rocketchat_user \
+        --pg-password your_password \
+        --debug
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import struct
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+import argparse
+import traceback
+
+# Auto-install dependencies if needed
+try:
+    import bson
+    import psycopg2
+except ImportError:
+    print("Installing required packages...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "pymongo", "psycopg2-binary"])
+    import bson
+    import psycopg2
+
+class MongoToPostgresConverter:
+    def __init__(self, mongo_db_path: str, postgres_config: Dict[str, str], debug_mode: bool = False, debug_collections: List[str] = None):
+        self.mongo_db_path = Path(mongo_db_path)
+        self.postgres_config = postgres_config
+        self.debug_mode = debug_mode
+        self.debug_collections = debug_collections or []
+        self.collections = {}
+        self.schema_info = {}
+        self.error_log = {}
+        
+    def log_debug(self, message: str, collection: str = None):
+        """Log debug messages if debug mode is enabled and collection is in debug list"""
+        if self.debug_mode and (not self.debug_collections or collection in self.debug_collections):
+            print(f"DEBUG: {message}")
+            
+    def log_error(self, collection: str, error_type: str, details: str):
+        """Log detailed error information"""
+        if collection not in self.error_log:
+            self.error_log[collection] = []
+        self.error_log[collection].append({
+            'type': error_type,
+            'details': details,
+            'timestamp': datetime.now().isoformat()
+        })
+        
+    def sample_documents(self, collection_name: str, max_samples: int = 3) -> List[Dict]:
+        """Sample documents from a collection for debugging"""
+        if not self.debug_mode or (self.debug_collections and collection_name not in self.debug_collections):
+            return []
+            
+        print(f"\n🔍 Sampling documents from {collection_name}:")
+        
+        bson_file = self.collections[collection_name]['bson_file']
+        if bson_file.stat().st_size == 0:
+            print("  Collection is empty")
+            return []
+            
+        samples = []
+        
+        try:
+            with open(bson_file, 'rb') as f:
+                sample_count = 0
+                while sample_count < max_samples:
+                    try:
+                        doc_size = int.from_bytes(f.read(4), byteorder='little')
+                        if doc_size <= 0:
+                            break
+                        f.seek(-4, 1)
+                        doc_bytes = f.read(doc_size)
+                        if len(doc_bytes) != doc_size:
+                            break
+                            
+                        doc = bson.decode(doc_bytes)
+                        samples.append(doc)
+                        sample_count += 1
+                        
+                        print(f"  Sample {sample_count} - Keys: {list(doc.keys())}")
+                        # Show a few key fields with their types and truncated values
+                        for key, value in list(doc.items())[:3]:
+                            value_preview = str(value)[:50] + "..." if len(str(value)) > 50 else str(value)
+                            print(f"    {key}: {type(value).__name__} = {value_preview}")
+                        if len(doc) > 3:
+                            print(f"    ... and {len(doc) - 3} more fields")
+                        print()
+                        
+                    except (bson.InvalidBSON, struct.error, OSError) as e:
+                        self.log_error(collection_name, 'document_parsing', str(e))
+                        break
+                        
+        except Exception as e:
+            self.log_error(collection_name, 'file_reading', str(e))
+            print(f"  Error reading collection: {e}")
+            
+        return samples
+
+    def discover_collections(self):
+        """Discover all BSON files and their metadata"""
+        print("Discovering MongoDB collections...")
+        
+        for bson_file in self.mongo_db_path.glob("*.bson"):
+            collection_name = bson_file.stem
+            metadata_file = bson_file.with_suffix(".metadata.json")
+            
+            # Read metadata if available
+            metadata = {}
+            if metadata_file.exists():
+                try:
+                    with open(metadata_file, 'r', encoding='utf-8') as f:
+                        metadata = json.load(f)
+                except (UnicodeDecodeError, json.JSONDecodeError) as e:
+                    print(f"Warning: Could not read metadata for {collection_name}: {e}")
+                    metadata = {}
+            
+            # Get file size and document count estimate
+            file_size = bson_file.stat().st_size
+            doc_count = self._estimate_document_count(bson_file)
+            
+            self.collections[collection_name] = {
+                'bson_file': bson_file,
+                'metadata': metadata,
+                'file_size': file_size,
+                'estimated_docs': doc_count
+            }
+            
+        print(f"Found {len(self.collections)} collections")
+        for name, info in self.collections.items():
+            print(f"  - {name}: {info['file_size']/1024/1024:.1f}MB (~{info['estimated_docs']} docs)")
+    
+    def _estimate_document_count(self, bson_file: Path) -> int:
+        """Estimate document count by reading first few documents"""
+        if bson_file.stat().st_size == 0:
+            return 0
+            
+        try:
+            with open(bson_file, 'rb') as f:
+                docs_sampled = 0
+                bytes_sampled = 0
+                max_sample_size = min(1024 * 1024, bson_file.stat().st_size)  # 1MB or file size
+                
+                while bytes_sampled < max_sample_size:
+                    try:
+                        doc_size = int.from_bytes(f.read(4), byteorder='little')
+                        if doc_size <= 0 or doc_size > 16 * 1024 * 1024:  # MongoDB doc size limit
+                            break
+                        f.seek(-4, 1)  # Go back
+                        doc_bytes = f.read(doc_size)
+                        if len(doc_bytes) != doc_size:
+                            break
+                        bson.decode(doc_bytes)  # Validate it's a valid BSON document
+                        docs_sampled += 1
+                        bytes_sampled += doc_size
+                    except (bson.InvalidBSON, struct.error, OSError):
+                        break
+                
+                if docs_sampled > 0 and bytes_sampled > 0:
+                    avg_doc_size = bytes_sampled / docs_sampled
+                    return int(bson_file.stat().st_size / avg_doc_size)
+                    
+        except Exception:
+            pass
+            
+        return 0
+    
+    def analyze_schema(self, collection_name: str, sample_size: int = 100) -> Dict[str, Any]:
+        """Analyze collection schema by sampling documents"""
+        print(f"Analyzing schema for {collection_name}...")
+        
+        bson_file = self.collections[collection_name]['bson_file']
+        if bson_file.stat().st_size == 0:
+            return {}
+            
+        schema = {}
+        docs_analyzed = 0
+        
+        try:
+            with open(bson_file, 'rb') as f:
+                while docs_analyzed < sample_size:
+                    try:
+                        doc_size = int.from_bytes(f.read(4), byteorder='little')
+                        if doc_size <= 0:
+                            break
+                        f.seek(-4, 1)
+                        doc_bytes = f.read(doc_size)
+                        if len(doc_bytes) != doc_size:
+                            break
+                            
+                        doc = bson.decode(doc_bytes)
+                        self._analyze_document_schema(doc, schema)
+                        docs_analyzed += 1
+                        
+                    except (bson.InvalidBSON, struct.error, OSError):
+                        break
+                        
+        except Exception as e:
+            print(f"Error analyzing {collection_name}: {e}")
+            
+        self.schema_info[collection_name] = schema
+        return schema
+    
+    def _analyze_document_schema(self, doc: Dict[str, Any], schema: Dict[str, Any], prefix: str = ""):
+        """Recursively analyze document structure"""
+        for key, value in doc.items():
+            full_key = f"{prefix}.{key}" if prefix else key
+            
+            if full_key not in schema:
+                schema[full_key] = {
+                    'types': set(),
+                    'null_count': 0,
+                    'total_count': 0,
+                    'is_array': False,
+                    'nested_schema': {}
+                }
+            
+            schema[full_key]['total_count'] += 1
+            
+            if value is None:
+                schema[full_key]['null_count'] += 1
+                schema[full_key]['types'].add('null')
+            elif isinstance(value, dict):
+                schema[full_key]['types'].add('object')
+                if 'nested_schema' not in schema[full_key]:
+                    schema[full_key]['nested_schema'] = {}
+                self._analyze_document_schema(value, schema[full_key]['nested_schema'])
+            elif isinstance(value, list):
+                schema[full_key]['types'].add('array')
+                schema[full_key]['is_array'] = True
+                if value and isinstance(value[0], dict):
+                    if 'array_item_schema' not in schema[full_key]:
+                        schema[full_key]['array_item_schema'] = {}
+                    for item in value[:5]:  # Sample first 5 items
+                        if isinstance(item, dict):
+                            self._analyze_document_schema(item, schema[full_key]['array_item_schema'])
+            else:
+                schema[full_key]['types'].add(type(value).__name__)
+    
+    def generate_postgres_schema(self) -> Dict[str, str]:
+        """Generate PostgreSQL CREATE TABLE statements"""
+        print("Generating PostgreSQL schema...")
+        
+        table_definitions = {}
+        
+        for collection_name, schema in self.schema_info.items():
+            if not schema:  # Empty collection
+                continue
+                
+            table_name = self._sanitize_table_name(collection_name)
+            columns = []
+            
+            # Always add an id column (PostgreSQL doesn't use _id like MongoDB)
+            columns.append("id SERIAL PRIMARY KEY")
+            
+            for field_name, field_info in schema.items():
+                if field_name == '_id':
+                    columns.append("mongo_id TEXT")  # Always allow NULL for mongo_id
+                    continue
+                    
+                col_name = self._sanitize_column_name(field_name)
+                
+                # Handle conflicts with PostgreSQL auto-generated columns
+                if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
+                    col_name = f"field_{col_name}"
+                
+                col_type = self._determine_postgres_type(field_info)
+                
+                # Make all fields nullable by default to avoid constraint violations
+                columns.append(f"{col_name} {col_type}")
+            
+            # Add metadata columns
+            columns.extend([
+                "created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
+                "updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP"
+            ])
+            
+            column_definitions = ',\n    '.join(columns)
+            table_sql = f"""
+CREATE TABLE IF NOT EXISTS {table_name} (
+    {column_definitions}
+);
+
+-- Create indexes based on MongoDB indexes
+"""
+            
+            # Get list of actual columns that will exist in the table
+            existing_columns = set(['id', 'mongo_id', 'created_at', 'updated_at'])
+            for field_name in schema.keys():
+                if field_name != '_id':
+                    col_name = self._sanitize_column_name(field_name)
+                    # Handle conflicts with PostgreSQL auto-generated columns
+                    if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
+                        col_name = f"field_{col_name}"
+                    existing_columns.add(col_name)
+            
+            # Add indexes from MongoDB metadata
+            metadata = self.collections[collection_name].get('metadata', {})
+            indexes = metadata.get('indexes', [])
+            
+            for index in indexes:
+                if index['name'] != '_id_':  # Skip the default _id index
+                    # Sanitize index name - remove special characters
+                    sanitized_index_name = re.sub(r'[^a-zA-Z0-9_]', '_', index['name'])
+                    index_name = f"idx_{table_name}_{sanitized_index_name}"
+                    index_keys = list(index['key'].keys())
+                    if index_keys:
+                        sanitized_keys = []
+                        for key in index_keys:
+                            if key != '_id':
+                                sanitized_key = self._sanitize_column_name(key)
+                                # Handle conflicts with PostgreSQL auto-generated columns
+                                if sanitized_key in ['id', 'mongo_id', 'created_at', 'updated_at']:
+                                    sanitized_key = f"field_{sanitized_key}"
+                                # Only add if the column actually exists in our table
+                                if sanitized_key in existing_columns:
+                                    sanitized_keys.append(sanitized_key)
+                        
+                        if sanitized_keys:
+                            table_sql += f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name} ({', '.join(sanitized_keys)});\n"
+            
+            table_definitions[collection_name] = table_sql
+            
+        return table_definitions
+    
+    def _sanitize_table_name(self, name: str) -> str:
+        """Convert MongoDB collection name to PostgreSQL table name"""
+        # Remove rocketchat_ prefix if present
+        if name.startswith('rocketchat_'):
+            name = name[11:]
+        
+        # Replace special characters with underscores
+        name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
+        
+        # Ensure it starts with a letter
+        if name and name[0].isdigit():
+            name = 'table_' + name
+            
+        return name.lower()
+    
+    def _sanitize_column_name(self, name: str) -> str:
+        """Convert MongoDB field name to PostgreSQL column name"""
+        # Handle nested field names (convert dots to underscores)
+        name = name.replace('.', '_')
+        
+        # Replace special characters with underscores
+        name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
+        
+        # Ensure it starts with a letter or underscore
+        if name and name[0].isdigit():
+            name = 'col_' + name
+            
+        # Handle PostgreSQL reserved words
+        reserved = {
+            'user', 'order', 'group', 'table', 'index', 'key', 'value', 'date', 'time', 'timestamp',
+            'default', 'select', 'from', 'where', 'insert', 'update', 'delete', 'create', 'drop',
+            'alter', 'grant', 'revoke', 'commit', 'rollback', 'begin', 'end', 'case', 'when',
+            'then', 'else', 'if', 'null', 'not', 'and', 'or', 'in', 'exists', 'between',
+            'like', 'limit', 'offset', 'union', 'join', 'inner', 'outer', 'left', 'right',
+            'full', 'cross', 'natural', 'on', 'using', 'distinct', 'all', 'any', 'some',
+            'desc', 'asc', 'primary', 'foreign', 'references', 'constraint', 'unique',
+            'check', 'cascade', 'restrict', 'action', 'match', 'partial', 'full'
+        }
+        if name.lower() in reserved:
+            name = name + '_col'
+            
+        return name.lower()
+    
+    def _determine_postgres_type(self, field_info: Dict[str, Any]) -> str:
+        """Determine PostgreSQL column type from MongoDB field analysis with improved logic"""
+        types = field_info['types']
+        
+        # Convert set to list for easier checking
+        type_list = list(types)
+        
+        # If there's only one type (excluding null), use specific typing
+        non_null_types = [t for t in type_list if t != 'null']
+        
+        if len(non_null_types) == 1:
+            single_type = non_null_types[0]
+            if single_type == 'bool':
+                return 'BOOLEAN'
+            elif single_type == 'int':
+                return 'INTEGER'
+            elif single_type == 'float':
+                return 'NUMERIC'
+            elif single_type == 'str':
+                return 'TEXT'
+            elif single_type == 'datetime':
+                return 'TIMESTAMP'
+            elif single_type == 'ObjectId':
+                return 'TEXT'
+        
+        # Handle mixed types more conservatively
+        if 'array' in types or field_info.get('is_array', False):
+            return 'JSONB'  # Arrays always go to JSONB
+        elif 'object' in types:
+            return 'JSONB'  # Objects always go to JSONB
+        elif len(non_null_types) > 1:
+            # Multiple non-null types - check for common combinations
+            if set(non_null_types) <= {'int', 'float'}:
+                return 'NUMERIC'  # Can handle both int and float
+            elif set(non_null_types) <= {'bool', 'str'}:
+                return 'TEXT'  # Convert everything to text
+            elif set(non_null_types) <= {'str', 'ObjectId'}:
+                return 'TEXT'  # Both are string-like
+            else:
+                return 'JSONB'  # Complex mixed types go to JSONB
+        elif 'ObjectId' in types:
+            return 'TEXT'
+        elif 'datetime' in types:
+            return 'TIMESTAMP'
+        elif 'bool' in types:
+            return 'BOOLEAN'
+        elif 'int' in types:
+            return 'INTEGER'
+        elif 'float' in types:
+            return 'NUMERIC'
+        elif 'str' in types:
+            return 'TEXT'
+        else:
+            return 'TEXT'  # Default fallback
+    
+    def create_postgres_database(self, table_definitions: Dict[str, str]):
+        """Create PostgreSQL database and tables"""
+        print("Creating PostgreSQL database schema...")
+        
+        try:
+            # Connect to PostgreSQL
+            conn = psycopg2.connect(**self.postgres_config)
+            conn.autocommit = True
+            cursor = conn.cursor()
+            
+            # Create tables
+            for collection_name, table_sql in table_definitions.items():
+                print(f"Creating table for {collection_name}...")
+                cursor.execute(table_sql)
+            
+            cursor.close()
+            conn.close()
+            print("Database schema created successfully!")
+            
+        except Exception as e:
+            print(f"Error creating database schema: {e}")
+            raise
+    
+    def convert_and_insert_data(self, batch_size: int = 1000):
+        """Convert BSON data and insert into PostgreSQL"""
+        print("Converting and inserting data...")
+        
+        try:
+            conn = psycopg2.connect(**self.postgres_config)
+            conn.autocommit = False
+            
+            for collection_name in self.collections:
+                print(f"Processing {collection_name}...")
+                self._convert_collection(conn, collection_name, batch_size)
+            
+            conn.close()
+            print("Data conversion completed successfully!")
+            
+        except Exception as e:
+            print(f"Error converting data: {e}")
+            raise
+    
+    def _convert_collection(self, conn, collection_name: str, batch_size: int):
+        """Convert a single collection"""
+        bson_file = self.collections[collection_name]['bson_file']
+        
+        if bson_file.stat().st_size == 0:
+            print(f"  Skipping empty collection {collection_name}")
+            return
+            
+        table_name = self._sanitize_table_name(collection_name)
+        cursor = conn.cursor()
+        
+        batch = []
+        total_inserted = 0
+        errors = 0
+        
+        try:
+            with open(bson_file, 'rb') as f:
+                while True:
+                    try:
+                        doc_size = int.from_bytes(f.read(4), byteorder='little')
+                        if doc_size <= 0:
+                            break
+                        f.seek(-4, 1)
+                        doc_bytes = f.read(doc_size)
+                        if len(doc_bytes) != doc_size:
+                            break
+                            
+                        doc = bson.decode(doc_bytes)
+                        batch.append(doc)
+                        
+                        if len(batch) >= batch_size:
+                            inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
+                            total_inserted += inserted
+                            errors += batch_errors
+                            batch = []
+                            conn.commit()
+                            if total_inserted % 5000 == 0:  # Less frequent progress updates
+                                print(f"  Inserted {total_inserted} documents...")
+                            
+                    except (bson.InvalidBSON, struct.error, OSError):
+                        break
+                
+                # Insert remaining documents
+                if batch:
+                    inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
+                    total_inserted += inserted
+                    errors += batch_errors
+                    conn.commit()
+                    
+            if errors > 0:
+                print(f"  Completed {collection_name}: {total_inserted} documents inserted ({errors} errors)")
+            else:
+                print(f"  Completed {collection_name}: {total_inserted} documents inserted")
+            
+        except Exception as e:
+            print(f"  Error processing {collection_name}: {e}")
+            conn.rollback()
+        finally:
+            cursor.close()
+    
+    def _insert_batch(self, cursor, table_name: str, documents: List[Dict], collection_name: str):
+        """Insert a batch of documents with proper transaction handling"""
+        if not documents:
+            return 0, 0
+            
+        # Get schema info for this collection
+        schema = self.schema_info.get(collection_name, {})
+        
+        # Build column list
+        columns = ['mongo_id']
+        for field_name in schema.keys():
+            if field_name != '_id':
+                col_name = self._sanitize_column_name(field_name)
+                # Handle conflicts with PostgreSQL auto-generated columns
+                if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
+                    col_name = f"field_{col_name}"
+                columns.append(col_name)
+        
+        # Build INSERT statement
+        placeholders = ', '.join(['%s'] * len(columns))
+        sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
+        
+        self.log_debug(f"SQL: {sql}", collection_name)
+        
+        # Convert documents to tuples
+        rows = []
+        errors = 0
+        
+        for doc_idx, doc in enumerate(documents):
+            try:
+                row = []
+                
+                # Add mongo_id
+                row.append(str(doc.get('_id', '')))
+                
+                # Add other fields
+                for field_name in schema.keys():
+                    if field_name != '_id':
+                        try:
+                            value = self._get_nested_value(doc, field_name)
+                            converted_value = self._convert_value_for_postgres(value, field_name, schema)
+                            row.append(converted_value)
+                        except Exception as e:
+                            self.log_error(collection_name, 'field_conversion', 
+                                         f"Field '{field_name}' in doc {doc_idx}: {str(e)}")
+                            # Only show debug for collections we're focusing on
+                            if collection_name in self.debug_collections:
+                                print(f"    ⚠️  Error converting field '{field_name}': {e}")
+                            row.append(None)  # Use NULL for problematic fields
+                
+                rows.append(tuple(row))
+                
+            except Exception as e:
+                self.log_error(collection_name, 'document_conversion', f"Document {doc_idx}: {str(e)}")
+                errors += 1
+                continue
+        
+        # Execute batch insert
+        if rows:
+            try:
+                cursor.executemany(sql, rows)
+                return len(rows), errors
+            except Exception as batch_error:
+                self.log_error(collection_name, 'batch_insert', str(batch_error))
+                
+                # Only show detailed debugging for targeted collections
+                if collection_name in self.debug_collections:
+                    print(f"    🔴 Batch insert failed for {collection_name}: {batch_error}")
+                    print("    Trying individual inserts with rollback handling...")
+                
+                # Rollback the failed transaction
+                cursor.connection.rollback()
+                
+                # Try inserting one by one in individual transactions
+                success_count = 0
+                for row_idx, row in enumerate(rows):
+                    try:
+                        cursor.execute(sql, row)
+                        cursor.connection.commit()  # Commit each successful insert
+                        success_count += 1
+                    except Exception as row_error:
+                        cursor.connection.rollback()  # Rollback failed insert
+                        self.log_error(collection_name, 'row_insert', f"Row {row_idx}: {str(row_error)}")
+                        
+                        # Show detailed error only for the first few failures and only for targeted collections
+                        if collection_name in self.debug_collections and errors < 3:
+                            print(f"      Row {row_idx} failed: {row_error}")
+                            print(f"      Row data: {len(row)} values, expected {len(columns)} columns")
+                            
+                        errors += 1
+                        continue
+                return success_count, errors
+        
+        return 0, errors
+    
+    def _get_nested_value(self, doc: Dict, field_path: str):
+        """Get value from nested document using dot notation"""
+        keys = field_path.split('.')
+        value = doc
+        
+        for key in keys:
+            if isinstance(value, dict) and key in value:
+                value = value[key]
+            else:
+                return None
+                
+        return value
+    
+    def _convert_value_for_postgres(self, value, field_name: str = None, schema: Dict = None):
+        """Convert MongoDB value to PostgreSQL compatible value with schema-aware conversion"""
+        if value is None:
+            return None
+        
+        # Get the expected PostgreSQL type for this field if available
+        expected_type = None
+        if schema and field_name and field_name in schema:
+            field_info = schema[field_name]
+            expected_type = self._determine_postgres_type(field_info)
+        
+        # Handle conversion based on expected type
+        if expected_type == 'BOOLEAN':
+            if isinstance(value, bool):
+                return value
+            elif isinstance(value, str):
+                return value.lower() in ('true', '1', 'yes', 'on')
+            elif isinstance(value, (int, float)):
+                return bool(value)
+            else:
+                return None
+        elif expected_type == 'INTEGER':
+            if isinstance(value, int):
+                return value
+            elif isinstance(value, float):
+                return int(value)
+            elif isinstance(value, str) and value.isdigit():
+                return int(value)
+            elif isinstance(value, bool):
+                return int(value)
+            else:
+                return None
+        elif expected_type == 'NUMERIC':
+            if isinstance(value, (int, float)):
+                return value
+            elif isinstance(value, str):
+                try:
+                    return float(value)
+                except ValueError:
+                    return None
+            elif isinstance(value, bool):
+                return float(value)
+            else:
+                return None
+        elif expected_type == 'TEXT':
+            if isinstance(value, str):
+                return value
+            elif value is not None:
+                str_value = str(value)
+                # Handle very long strings
+                if len(str_value) > 65535:
+                    return str_value[:65535]
+                return str_value
+            else:
+                return None
+        elif expected_type == 'TIMESTAMP':
+            if hasattr(value, 'isoformat'):
+                return value.isoformat()
+            elif isinstance(value, str):
+                return value
+            else:
+                return str(value) if value is not None else None
+        elif expected_type == 'JSONB':
+            if isinstance(value, (dict, list)):
+                return json.dumps(value, default=self._json_serializer)
+            elif isinstance(value, str):
+                # Check if it's already valid JSON
+                try:
+                    json.loads(value)
+                    return value
+                except (json.JSONDecodeError, TypeError):
+                    # Not valid JSON, wrap it
+                    return json.dumps(value)
+            else:
+                return json.dumps(value, default=self._json_serializer)
+        
+        # Fallback to original logic if no expected type or type not recognized
+        if isinstance(value, bool):
+            return value
+        elif isinstance(value, (int, float)):
+            return value
+        elif isinstance(value, str):
+            return value
+        elif isinstance(value, (dict, list)):
+            return json.dumps(value, default=self._json_serializer)
+        elif hasattr(value, 'isoformat'):  # datetime
+            return value.isoformat()
+        elif hasattr(value, '__str__'):
+            str_value = str(value)
+            if len(str_value) > 65535:
+                return str_value[:65535]
+            return str_value
+        else:
+            return str(value)
+    
+    def _json_serializer(self, obj):
+        """Custom JSON serializer for complex objects with better error handling"""
+        try:
+            if hasattr(obj, 'isoformat'):  # datetime
+                return obj.isoformat()
+            elif hasattr(obj, '__str__'):
+                return str(obj)
+            else:
+                return None
+        except Exception as e:
+            self.log_debug(f"JSON serialization error: {e}")
+            return str(obj)
+    
+    def run_conversion(self, sample_size: int = 100, batch_size: int = 1000):
+        """Run the full conversion process with focused debugging"""
+        print("Starting MongoDB to PostgreSQL conversion...")
+        print("This will convert your Rocket.Chat database from MongoDB to PostgreSQL")
+        if self.debug_mode:
+            if self.debug_collections:
+                print(f"🐛 DEBUG MODE: Focusing on collections: {', '.join(self.debug_collections)}")
+            else:
+                print("🐛 DEBUG MODE: All collections")
+        print("=" * 70)
+        
+        # Step 1: Discover collections
+        self.discover_collections()
+        
+        # Step 2: Analyze schemas
+        print("\nAnalyzing collection schemas...")
+        for collection_name in self.collections:
+            self.analyze_schema(collection_name, sample_size)
+        
+        # Sample problematic collections if debugging
+        if self.debug_mode and self.debug_collections:
+            for coll in self.debug_collections:
+                if coll in self.collections:
+                    self.sample_documents(coll, 2)
+        
+        # Step 3: Generate PostgreSQL schema
+        table_definitions = self.generate_postgres_schema()
+        
+        # Step 4: Create database schema
+        self.create_postgres_database(table_definitions)
+        
+        # Step 5: Convert and insert data
+        self.convert_and_insert_data(batch_size)
+        
+        # Step 6: Show error summary
+        self._print_error_summary()
+        
+        print("=" * 70)
+        print("✅ Conversion completed!")
+        print(f"   Database: {self.postgres_config['database']}")
+        print(f"   Tables created: {len(table_definitions)}")
+        
+    def _print_error_summary(self):
+        """Print a focused summary of errors"""
+        if not self.error_log:
+            print("\n✅ No errors encountered during conversion!")
+            return
+            
+        print("\n⚠️  ERROR SUMMARY:")
+        print("=" * 50)
+        
+        # Sort by error count descending
+        sorted_collections = sorted(self.error_log.items(), 
+                                  key=lambda x: len(x[1]), reverse=True)
+        
+        for collection, errors in sorted_collections:
+            error_types = {}
+            for error in errors:
+                error_type = error['type']
+                if error_type not in error_types:
+                    error_types[error_type] = []
+                error_types[error_type].append(error['details'])
+            
+            print(f"\n🔴 {collection} ({len(errors)} total errors):")
+            for error_type, details_list in error_types.items():
+                print(f"   {error_type}: {len(details_list)} errors")
+                
+                # Show sample errors for critical collections
+                if collection in ['rocketchat_settings', 'rocketchat_room'] and len(details_list) > 0:
+                    print(f"     Sample: {details_list[0][:100]}...")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert MongoDB BSON export to PostgreSQL',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Basic usage
+  python3 mongo_to_postgres_converter.py \\
+    --mongo-path db/database/62df06d44234d20001289144 \\
+    --pg-database rocketchat_converted \\
+    --pg-user rocketchat_user \\
+    --pg-password mypassword
+
+  # Debug specific failing collections
+  python3 mongo_to_postgres_converter.py \\
+    --mongo-path db/database/62df06d44234d20001289144 \\
+    --pg-database rocketchat_converted \\
+    --pg-user rocketchat_user \\
+    --pg-password mypassword \\
+    --debug-collections rocketchat_settings rocketchat_room
+
+Before running this script:
+1. Run: sudo -u postgres psql -f reset_database.sql
+2. Update the password in reset_database.sql
+        """
+    )
+    
+    parser.add_argument('--mongo-path', required=True, help='Path to MongoDB export directory')
+    parser.add_argument('--pg-host', default='localhost', help='PostgreSQL host (default: localhost)')
+    parser.add_argument('--pg-port', default='5432', help='PostgreSQL port (default: 5432)')
+    parser.add_argument('--pg-database', required=True, help='PostgreSQL database name')
+    parser.add_argument('--pg-user', required=True, help='PostgreSQL username')
+    parser.add_argument('--pg-password', required=True, help='PostgreSQL password')
+    parser.add_argument('--sample-size', type=int, default=100, help='Number of documents to sample for schema analysis (default: 100)')
+    parser.add_argument('--batch-size', type=int, default=1000, help='Batch size for data insertion (default: 1000)')
+    parser.add_argument('--debug', action='store_true', help='Enable debug mode with detailed error logging')
+    parser.add_argument('--debug-collections', nargs='*', help='Specific collections to debug (e.g., rocketchat_settings rocketchat_room)')
+    
+    args = parser.parse_args()
+    
+    postgres_config = {
+        'host': args.pg_host,
+        'port': args.pg_port,
+        'database': args.pg_database,
+        'user': args.pg_user,
+        'password': args.pg_password
+    }
+    
+    # Enable debug mode if debug collections are specified
+    debug_mode = args.debug or (args.debug_collections is not None)
+    
+    converter = MongoToPostgresConverter(args.mongo_path, postgres_config, debug_mode, args.debug_collections)
+    converter.run_conversion(args.sample_size, args.batch_size)
+
+
+if __name__ == '__main__':
+    main() 
--- a/inventory-server/chat/db-convert/reset_database.sql
+++ b/inventory-server/chat/db-convert/reset_database.sql
@@ -0,0 +1,41 @@
+-- PostgreSQL Database Reset Script for Rocket.Chat Import
+-- Run as: sudo -u postgres psql -f reset_database.sql
+
+-- Terminate all connections to the database (force disconnect users)
+SELECT pg_terminate_backend(pid)
+FROM pg_stat_activity
+WHERE datname = 'rocketchat_converted' AND pid <> pg_backend_pid();
+
+-- Drop the database if it exists
+DROP DATABASE IF EXISTS rocketchat_converted;
+
+-- Create fresh database
+CREATE DATABASE rocketchat_converted;
+
+-- Create user (if not exists)
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT FROM pg_user WHERE usename = 'rocketchat_user') THEN
+        CREATE USER rocketchat_user WITH PASSWORD 'HKjLgt23gWuPXzEAn3rW';
+    END IF;
+END $$;
+
+-- Grant database privileges
+GRANT CONNECT ON DATABASE rocketchat_converted TO rocketchat_user;
+GRANT CREATE ON DATABASE rocketchat_converted TO rocketchat_user;
+
+-- Connect to the new database
+\c rocketchat_converted;
+
+-- Grant schema privileges
+GRANT CREATE ON SCHEMA public TO rocketchat_user;
+GRANT USAGE ON SCHEMA public TO rocketchat_user;
+
+-- Grant privileges on all future tables and sequences
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO rocketchat_user;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO rocketchat_user;
+
+-- Display success message
+\echo 'Database reset completed successfully!'
+\echo 'You can now run the converter with:'
+\echo 'python3 mongo_to_postgres_converter.py --mongo-path db/database/62df06d44234d20001289144 --pg-database rocketchat_converted --pg-user rocketchat_user --pg-password your_password' 
--- a/inventory-server/chat/db-convert/test_converter.py
+++ b/inventory-server/chat/db-convert/test_converter.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify the converter fixes work for problematic collections
+"""
+
+from mongo_to_postgres_converter import MongoToPostgresConverter
+
+def test_problematic_collections():
+    print("🧪 Testing converter fixes for problematic collections...")
+    
+    postgres_config = {
+        'host': 'localhost',
+        'port': '5432',
+        'database': 'rocketchat_test',
+        'user': 'rocketchat_user',
+        'password': 'password123'
+    }
+    
+    converter = MongoToPostgresConverter(
+        'db/database/62df06d44234d20001289144',
+        postgres_config,
+        debug_mode=True,
+        debug_collections=['rocketchat_settings', 'rocketchat_room']
+    )
+    
+    # Test just discovery and schema analysis
+    print("\n1. Testing collection discovery...")
+    converter.discover_collections()
+    
+    print("\n2. Testing schema analysis...")
+    if 'rocketchat_settings' in converter.collections:
+        settings_schema = converter.analyze_schema('rocketchat_settings', 10)
+        print(f"Settings schema fields: {len(settings_schema)}")
+        
+        # Check specific problematic fields
+        if 'packageValue' in settings_schema:
+            packagevalue_info = settings_schema['packageValue']
+            pg_type = converter._determine_postgres_type(packagevalue_info)
+            print(f"packageValue types: {packagevalue_info['types']} -> PostgreSQL: {pg_type}")
+            
+    if 'rocketchat_room' in converter.collections:
+        room_schema = converter.analyze_schema('rocketchat_room', 10)
+        print(f"Room schema fields: {len(room_schema)}")
+        
+        # Check specific problematic fields
+        if 'sysMes' in room_schema:
+            sysmes_info = room_schema['sysMes']
+            pg_type = converter._determine_postgres_type(sysmes_info)
+            print(f"sysMes types: {sysmes_info['types']} -> PostgreSQL: {pg_type}")
+    
+    print("\n✅ Test completed - check the type mappings above!")
+
+if __name__ == '__main__':
+    test_problematic_collections() 
--- a/inventory-server/chat/package-lock.json
+++ b/inventory-server/chat/package-lock.json
--- a/inventory-server/chat/package.json
+++ b/inventory-server/chat/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "chat-server",
+  "version": "1.0.0",
+  "description": "Chat archive server for Rocket.Chat data",
+  "main": "server.js",
+  "scripts": {
+    "start": "node server.js",
+    "dev": "nodemon server.js"
+  },
+  "dependencies": {
+    "express": "^4.18.2",
+    "cors": "^2.8.5",
+    "pg": "^8.11.0",
+    "dotenv": "^16.0.3",
+    "morgan": "^1.10.0"
+  },
+  "devDependencies": {
+    "nodemon": "^2.0.22"
+  }
+} 
--- a/inventory-server/chat/routes.js
+++ b/inventory-server/chat/routes.js
@@ -0,0 +1,87 @@
+const express = require('express');
+const router = express.Router();
+
+// Get all active users for the "view as" dropdown
+router.get('/users', async (req, res) => {
+  try {
+    const result = await global.pool.query(`
+      SELECT id, username, name, type, active
+      FROM users 
+      WHERE active = true AND type = 'user'
+      ORDER BY name ASC
+    `);
+    
+    res.json({
+      status: 'success',
+      users: result.rows
+    });
+  } catch (error) {
+    console.error('Error fetching users:', error);
+    res.status(500).json({ 
+      status: 'error',
+      error: 'Failed to fetch users',
+      details: error.message
+    });
+  }
+});
+
+// Get rooms for a specific user
+router.get('/users/:userId/rooms', async (req, res) => {
+  const { userId } = req.params;
+  
+  try {
+    // Get rooms where the user is a member
+    const result = await global.pool.query(`
+      SELECT DISTINCT r.id, r.name, r.fname, r.t as type, r.msgs, r.lm as last_message_date
+      FROM room r, subscription s
+      WHERE s.rid = r.mongo_id 
+      AND s.u->>'_id' = (SELECT mongo_id FROM users WHERE id = $1)
+      AND r.archived IS NOT TRUE
+      ORDER BY r.lm DESC NULLS LAST
+      LIMIT 50
+    `, [userId]);
+    
+    res.json({
+      status: 'success',
+      rooms: result.rows
+    });
+  } catch (error) {
+    console.error('Error fetching user rooms:', error);
+    res.status(500).json({ 
+      status: 'error',
+      error: 'Failed to fetch user rooms',
+      details: error.message
+    });
+  }
+});
+
+// Get messages for a specific room
+router.get('/rooms/:roomId/messages', async (req, res) => {
+  const { roomId } = req.params;
+  const { limit = 50, offset = 0 } = req.query;
+  
+  try {
+    const result = await global.pool.query(`
+      SELECT m.id, m.msg, m.ts, m.u, m._updatedat
+      FROM message m
+      JOIN room r ON m.rid = r.mongo_id
+      WHERE r.id = $1
+      ORDER BY m.ts DESC
+      LIMIT $2 OFFSET $3
+    `, [roomId, limit, offset]);
+    
+    res.json({
+      status: 'success',
+      messages: result.rows
+    });
+  } catch (error) {
+    console.error('Error fetching messages:', error);
+    res.status(500).json({ 
+      status: 'error',
+      error: 'Failed to fetch messages',
+      details: error.message
+    });
+  }
+});
+
+module.exports = router; 
--- a/inventory-server/chat/server.js
+++ b/inventory-server/chat/server.js
@@ -0,0 +1,83 @@
+require('dotenv').config({ path: '../.env' });
+const express = require('express');
+const cors = require('cors');
+const { Pool } = require('pg');
+const morgan = require('morgan');
+const chatRoutes = require('./routes');
+
+// Log startup configuration
+console.log('Starting chat server with config:', {
+  host: process.env.CHAT_DB_HOST,
+  user: process.env.CHAT_DB_USER,
+  database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
+  port: process.env.CHAT_DB_PORT,
+  chat_port: process.env.CHAT_PORT || 3014
+});
+
+const app = express();
+const port = process.env.CHAT_PORT || 3014;
+
+// Database configuration for rocketchat_converted database
+const pool = new Pool({
+  host: process.env.CHAT_DB_HOST,
+  user: process.env.CHAT_DB_USER,
+  password: process.env.CHAT_DB_PASSWORD,
+  database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
+  port: process.env.CHAT_DB_PORT,
+});
+
+// Make pool available globally
+global.pool = pool;
+
+// Middleware
+app.use(express.json());
+app.use(morgan('combined'));
+app.use(cors({
+  origin: ['http://localhost:5175', 'http://localhost:5174', 'https://inventory.kent.pw'],
+  credentials: true
+}));
+
+// Test database connection endpoint
+app.get('/test-db', async (req, res) => {
+  try {
+    const result = await pool.query('SELECT COUNT(*) as user_count FROM users WHERE active = true');
+    const messageResult = await pool.query('SELECT COUNT(*) as message_count FROM message');
+    const roomResult = await pool.query('SELECT COUNT(*) as room_count FROM room');
+    
+    res.json({
+      status: 'success',
+      database: 'rocketchat_converted',
+      stats: {
+        active_users: parseInt(result.rows[0].user_count),
+        total_messages: parseInt(messageResult.rows[0].message_count),
+        total_rooms: parseInt(roomResult.rows[0].room_count)
+      }
+    });
+  } catch (error) {
+    console.error('Database test error:', error);
+    res.status(500).json({ 
+      status: 'error',
+      error: 'Database connection failed',
+      details: error.message
+    });
+  }
+});
+
+// Mount all routes from routes.js
+app.use('/', chatRoutes);
+
+// Health check endpoint
+app.get('/health', (req, res) => {
+  res.json({ status: 'healthy' });
+});
+
+// Error handling middleware
+app.use((err, req, res, next) => {
+  console.error(err.stack);
+  res.status(500).json({ error: 'Something broke!' });
+});
+
+// Start server
+app.listen(port, () => {
+  console.log(`Chat server running on port ${port}`);
+});