Add chat page and chat server

This commit is contained in:
2025-06-14 13:36:31 -04:00
parent 00501704df
commit b2330dee22
17 changed files with 3039 additions and 192 deletions

View File

@@ -0,0 +1,881 @@
#!/usr/bin/env python3
"""
MongoDB to PostgreSQL Converter for Rocket.Chat
Converts MongoDB BSON export files to PostgreSQL database
Usage:
python3 mongo_to_postgres_converter.py \
--mongo-path db/database/62df06d44234d20001289144 \
--pg-database rocketchat_converted \
--pg-user rocketchat_user \
--pg-password your_password \
--debug
"""
import json
import os
import re
import subprocess
import sys
import struct
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, List, Optional
import argparse
import traceback
# Auto-install dependencies if needed
try:
import bson
import psycopg2
except ImportError:
print("Installing required packages...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "pymongo", "psycopg2-binary"])
import bson
import psycopg2
class MongoToPostgresConverter:
def __init__(self, mongo_db_path: str, postgres_config: Dict[str, str], debug_mode: bool = False, debug_collections: List[str] = None):
self.mongo_db_path = Path(mongo_db_path)
self.postgres_config = postgres_config
self.debug_mode = debug_mode
self.debug_collections = debug_collections or []
self.collections = {}
self.schema_info = {}
self.error_log = {}
def log_debug(self, message: str, collection: str = None):
"""Log debug messages if debug mode is enabled and collection is in debug list"""
if self.debug_mode and (not self.debug_collections or collection in self.debug_collections):
print(f"DEBUG: {message}")
def log_error(self, collection: str, error_type: str, details: str):
"""Log detailed error information"""
if collection not in self.error_log:
self.error_log[collection] = []
self.error_log[collection].append({
'type': error_type,
'details': details,
'timestamp': datetime.now().isoformat()
})
def sample_documents(self, collection_name: str, max_samples: int = 3) -> List[Dict]:
"""Sample documents from a collection for debugging"""
if not self.debug_mode or (self.debug_collections and collection_name not in self.debug_collections):
return []
print(f"\n🔍 Sampling documents from {collection_name}:")
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
print(" Collection is empty")
return []
samples = []
try:
with open(bson_file, 'rb') as f:
sample_count = 0
while sample_count < max_samples:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
samples.append(doc)
sample_count += 1
print(f" Sample {sample_count} - Keys: {list(doc.keys())}")
# Show a few key fields with their types and truncated values
for key, value in list(doc.items())[:3]:
value_preview = str(value)[:50] + "..." if len(str(value)) > 50 else str(value)
print(f" {key}: {type(value).__name__} = {value_preview}")
if len(doc) > 3:
print(f" ... and {len(doc) - 3} more fields")
print()
except (bson.InvalidBSON, struct.error, OSError) as e:
self.log_error(collection_name, 'document_parsing', str(e))
break
except Exception as e:
self.log_error(collection_name, 'file_reading', str(e))
print(f" Error reading collection: {e}")
return samples
def discover_collections(self):
"""Discover all BSON files and their metadata"""
print("Discovering MongoDB collections...")
for bson_file in self.mongo_db_path.glob("*.bson"):
collection_name = bson_file.stem
metadata_file = bson_file.with_suffix(".metadata.json")
# Read metadata if available
metadata = {}
if metadata_file.exists():
try:
with open(metadata_file, 'r', encoding='utf-8') as f:
metadata = json.load(f)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
print(f"Warning: Could not read metadata for {collection_name}: {e}")
metadata = {}
# Get file size and document count estimate
file_size = bson_file.stat().st_size
doc_count = self._estimate_document_count(bson_file)
self.collections[collection_name] = {
'bson_file': bson_file,
'metadata': metadata,
'file_size': file_size,
'estimated_docs': doc_count
}
print(f"Found {len(self.collections)} collections")
for name, info in self.collections.items():
print(f" - {name}: {info['file_size']/1024/1024:.1f}MB (~{info['estimated_docs']} docs)")
def _estimate_document_count(self, bson_file: Path) -> int:
"""Estimate document count by reading first few documents"""
if bson_file.stat().st_size == 0:
return 0
try:
with open(bson_file, 'rb') as f:
docs_sampled = 0
bytes_sampled = 0
max_sample_size = min(1024 * 1024, bson_file.stat().st_size) # 1MB or file size
while bytes_sampled < max_sample_size:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0 or doc_size > 16 * 1024 * 1024: # MongoDB doc size limit
break
f.seek(-4, 1) # Go back
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
bson.decode(doc_bytes) # Validate it's a valid BSON document
docs_sampled += 1
bytes_sampled += doc_size
except (bson.InvalidBSON, struct.error, OSError):
break
if docs_sampled > 0 and bytes_sampled > 0:
avg_doc_size = bytes_sampled / docs_sampled
return int(bson_file.stat().st_size / avg_doc_size)
except Exception:
pass
return 0
def analyze_schema(self, collection_name: str, sample_size: int = 100) -> Dict[str, Any]:
"""Analyze collection schema by sampling documents"""
print(f"Analyzing schema for {collection_name}...")
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
return {}
schema = {}
docs_analyzed = 0
try:
with open(bson_file, 'rb') as f:
while docs_analyzed < sample_size:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
self._analyze_document_schema(doc, schema)
docs_analyzed += 1
except (bson.InvalidBSON, struct.error, OSError):
break
except Exception as e:
print(f"Error analyzing {collection_name}: {e}")
self.schema_info[collection_name] = schema
return schema
def _analyze_document_schema(self, doc: Dict[str, Any], schema: Dict[str, Any], prefix: str = ""):
"""Recursively analyze document structure"""
for key, value in doc.items():
full_key = f"{prefix}.{key}" if prefix else key
if full_key not in schema:
schema[full_key] = {
'types': set(),
'null_count': 0,
'total_count': 0,
'is_array': False,
'nested_schema': {}
}
schema[full_key]['total_count'] += 1
if value is None:
schema[full_key]['null_count'] += 1
schema[full_key]['types'].add('null')
elif isinstance(value, dict):
schema[full_key]['types'].add('object')
if 'nested_schema' not in schema[full_key]:
schema[full_key]['nested_schema'] = {}
self._analyze_document_schema(value, schema[full_key]['nested_schema'])
elif isinstance(value, list):
schema[full_key]['types'].add('array')
schema[full_key]['is_array'] = True
if value and isinstance(value[0], dict):
if 'array_item_schema' not in schema[full_key]:
schema[full_key]['array_item_schema'] = {}
for item in value[:5]: # Sample first 5 items
if isinstance(item, dict):
self._analyze_document_schema(item, schema[full_key]['array_item_schema'])
else:
schema[full_key]['types'].add(type(value).__name__)
def generate_postgres_schema(self) -> Dict[str, str]:
"""Generate PostgreSQL CREATE TABLE statements"""
print("Generating PostgreSQL schema...")
table_definitions = {}
for collection_name, schema in self.schema_info.items():
if not schema: # Empty collection
continue
table_name = self._sanitize_table_name(collection_name)
columns = []
# Always add an id column (PostgreSQL doesn't use _id like MongoDB)
columns.append("id SERIAL PRIMARY KEY")
for field_name, field_info in schema.items():
if field_name == '_id':
columns.append("mongo_id TEXT") # Always allow NULL for mongo_id
continue
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
col_type = self._determine_postgres_type(field_info)
# Make all fields nullable by default to avoid constraint violations
columns.append(f"{col_name} {col_type}")
# Add metadata columns
columns.extend([
"created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
"updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP"
])
column_definitions = ',\n '.join(columns)
table_sql = f"""
CREATE TABLE IF NOT EXISTS {table_name} (
{column_definitions}
);
-- Create indexes based on MongoDB indexes
"""
# Get list of actual columns that will exist in the table
existing_columns = set(['id', 'mongo_id', 'created_at', 'updated_at'])
for field_name in schema.keys():
if field_name != '_id':
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
existing_columns.add(col_name)
# Add indexes from MongoDB metadata
metadata = self.collections[collection_name].get('metadata', {})
indexes = metadata.get('indexes', [])
for index in indexes:
if index['name'] != '_id_': # Skip the default _id index
# Sanitize index name - remove special characters
sanitized_index_name = re.sub(r'[^a-zA-Z0-9_]', '_', index['name'])
index_name = f"idx_{table_name}_{sanitized_index_name}"
index_keys = list(index['key'].keys())
if index_keys:
sanitized_keys = []
for key in index_keys:
if key != '_id':
sanitized_key = self._sanitize_column_name(key)
# Handle conflicts with PostgreSQL auto-generated columns
if sanitized_key in ['id', 'mongo_id', 'created_at', 'updated_at']:
sanitized_key = f"field_{sanitized_key}"
# Only add if the column actually exists in our table
if sanitized_key in existing_columns:
sanitized_keys.append(sanitized_key)
if sanitized_keys:
table_sql += f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name} ({', '.join(sanitized_keys)});\n"
table_definitions[collection_name] = table_sql
return table_definitions
def _sanitize_table_name(self, name: str) -> str:
"""Convert MongoDB collection name to PostgreSQL table name"""
# Remove rocketchat_ prefix if present
if name.startswith('rocketchat_'):
name = name[11:]
# Replace special characters with underscores
name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
# Ensure it starts with a letter
if name and name[0].isdigit():
name = 'table_' + name
return name.lower()
def _sanitize_column_name(self, name: str) -> str:
"""Convert MongoDB field name to PostgreSQL column name"""
# Handle nested field names (convert dots to underscores)
name = name.replace('.', '_')
# Replace special characters with underscores
name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
# Ensure it starts with a letter or underscore
if name and name[0].isdigit():
name = 'col_' + name
# Handle PostgreSQL reserved words
reserved = {
'user', 'order', 'group', 'table', 'index', 'key', 'value', 'date', 'time', 'timestamp',
'default', 'select', 'from', 'where', 'insert', 'update', 'delete', 'create', 'drop',
'alter', 'grant', 'revoke', 'commit', 'rollback', 'begin', 'end', 'case', 'when',
'then', 'else', 'if', 'null', 'not', 'and', 'or', 'in', 'exists', 'between',
'like', 'limit', 'offset', 'union', 'join', 'inner', 'outer', 'left', 'right',
'full', 'cross', 'natural', 'on', 'using', 'distinct', 'all', 'any', 'some',
'desc', 'asc', 'primary', 'foreign', 'references', 'constraint', 'unique',
'check', 'cascade', 'restrict', 'action', 'match', 'partial', 'full'
}
if name.lower() in reserved:
name = name + '_col'
return name.lower()
def _determine_postgres_type(self, field_info: Dict[str, Any]) -> str:
"""Determine PostgreSQL column type from MongoDB field analysis with improved logic"""
types = field_info['types']
# Convert set to list for easier checking
type_list = list(types)
# If there's only one type (excluding null), use specific typing
non_null_types = [t for t in type_list if t != 'null']
if len(non_null_types) == 1:
single_type = non_null_types[0]
if single_type == 'bool':
return 'BOOLEAN'
elif single_type == 'int':
return 'INTEGER'
elif single_type == 'float':
return 'NUMERIC'
elif single_type == 'str':
return 'TEXT'
elif single_type == 'datetime':
return 'TIMESTAMP'
elif single_type == 'ObjectId':
return 'TEXT'
# Handle mixed types more conservatively
if 'array' in types or field_info.get('is_array', False):
return 'JSONB' # Arrays always go to JSONB
elif 'object' in types:
return 'JSONB' # Objects always go to JSONB
elif len(non_null_types) > 1:
# Multiple non-null types - check for common combinations
if set(non_null_types) <= {'int', 'float'}:
return 'NUMERIC' # Can handle both int and float
elif set(non_null_types) <= {'bool', 'str'}:
return 'TEXT' # Convert everything to text
elif set(non_null_types) <= {'str', 'ObjectId'}:
return 'TEXT' # Both are string-like
else:
return 'JSONB' # Complex mixed types go to JSONB
elif 'ObjectId' in types:
return 'TEXT'
elif 'datetime' in types:
return 'TIMESTAMP'
elif 'bool' in types:
return 'BOOLEAN'
elif 'int' in types:
return 'INTEGER'
elif 'float' in types:
return 'NUMERIC'
elif 'str' in types:
return 'TEXT'
else:
return 'TEXT' # Default fallback
def create_postgres_database(self, table_definitions: Dict[str, str]):
"""Create PostgreSQL database and tables"""
print("Creating PostgreSQL database schema...")
try:
# Connect to PostgreSQL
conn = psycopg2.connect(**self.postgres_config)
conn.autocommit = True
cursor = conn.cursor()
# Create tables
for collection_name, table_sql in table_definitions.items():
print(f"Creating table for {collection_name}...")
cursor.execute(table_sql)
cursor.close()
conn.close()
print("Database schema created successfully!")
except Exception as e:
print(f"Error creating database schema: {e}")
raise
def convert_and_insert_data(self, batch_size: int = 1000):
"""Convert BSON data and insert into PostgreSQL"""
print("Converting and inserting data...")
try:
conn = psycopg2.connect(**self.postgres_config)
conn.autocommit = False
for collection_name in self.collections:
print(f"Processing {collection_name}...")
self._convert_collection(conn, collection_name, batch_size)
conn.close()
print("Data conversion completed successfully!")
except Exception as e:
print(f"Error converting data: {e}")
raise
def _convert_collection(self, conn, collection_name: str, batch_size: int):
"""Convert a single collection"""
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
print(f" Skipping empty collection {collection_name}")
return
table_name = self._sanitize_table_name(collection_name)
cursor = conn.cursor()
batch = []
total_inserted = 0
errors = 0
try:
with open(bson_file, 'rb') as f:
while True:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
batch.append(doc)
if len(batch) >= batch_size:
inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
total_inserted += inserted
errors += batch_errors
batch = []
conn.commit()
if total_inserted % 5000 == 0: # Less frequent progress updates
print(f" Inserted {total_inserted} documents...")
except (bson.InvalidBSON, struct.error, OSError):
break
# Insert remaining documents
if batch:
inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
total_inserted += inserted
errors += batch_errors
conn.commit()
if errors > 0:
print(f" Completed {collection_name}: {total_inserted} documents inserted ({errors} errors)")
else:
print(f" Completed {collection_name}: {total_inserted} documents inserted")
except Exception as e:
print(f" Error processing {collection_name}: {e}")
conn.rollback()
finally:
cursor.close()
def _insert_batch(self, cursor, table_name: str, documents: List[Dict], collection_name: str):
"""Insert a batch of documents with proper transaction handling"""
if not documents:
return 0, 0
# Get schema info for this collection
schema = self.schema_info.get(collection_name, {})
# Build column list
columns = ['mongo_id']
for field_name in schema.keys():
if field_name != '_id':
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
columns.append(col_name)
# Build INSERT statement
placeholders = ', '.join(['%s'] * len(columns))
sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
self.log_debug(f"SQL: {sql}", collection_name)
# Convert documents to tuples
rows = []
errors = 0
for doc_idx, doc in enumerate(documents):
try:
row = []
# Add mongo_id
row.append(str(doc.get('_id', '')))
# Add other fields
for field_name in schema.keys():
if field_name != '_id':
try:
value = self._get_nested_value(doc, field_name)
converted_value = self._convert_value_for_postgres(value, field_name, schema)
row.append(converted_value)
except Exception as e:
self.log_error(collection_name, 'field_conversion',
f"Field '{field_name}' in doc {doc_idx}: {str(e)}")
# Only show debug for collections we're focusing on
if collection_name in self.debug_collections:
print(f" ⚠️ Error converting field '{field_name}': {e}")
row.append(None) # Use NULL for problematic fields
rows.append(tuple(row))
except Exception as e:
self.log_error(collection_name, 'document_conversion', f"Document {doc_idx}: {str(e)}")
errors += 1
continue
# Execute batch insert
if rows:
try:
cursor.executemany(sql, rows)
return len(rows), errors
except Exception as batch_error:
self.log_error(collection_name, 'batch_insert', str(batch_error))
# Only show detailed debugging for targeted collections
if collection_name in self.debug_collections:
print(f" 🔴 Batch insert failed for {collection_name}: {batch_error}")
print(" Trying individual inserts with rollback handling...")
# Rollback the failed transaction
cursor.connection.rollback()
# Try inserting one by one in individual transactions
success_count = 0
for row_idx, row in enumerate(rows):
try:
cursor.execute(sql, row)
cursor.connection.commit() # Commit each successful insert
success_count += 1
except Exception as row_error:
cursor.connection.rollback() # Rollback failed insert
self.log_error(collection_name, 'row_insert', f"Row {row_idx}: {str(row_error)}")
# Show detailed error only for the first few failures and only for targeted collections
if collection_name in self.debug_collections and errors < 3:
print(f" Row {row_idx} failed: {row_error}")
print(f" Row data: {len(row)} values, expected {len(columns)} columns")
errors += 1
continue
return success_count, errors
return 0, errors
def _get_nested_value(self, doc: Dict, field_path: str):
"""Get value from nested document using dot notation"""
keys = field_path.split('.')
value = doc
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return None
return value
def _convert_value_for_postgres(self, value, field_name: str = None, schema: Dict = None):
"""Convert MongoDB value to PostgreSQL compatible value with schema-aware conversion"""
if value is None:
return None
# Get the expected PostgreSQL type for this field if available
expected_type = None
if schema and field_name and field_name in schema:
field_info = schema[field_name]
expected_type = self._determine_postgres_type(field_info)
# Handle conversion based on expected type
if expected_type == 'BOOLEAN':
if isinstance(value, bool):
return value
elif isinstance(value, str):
return value.lower() in ('true', '1', 'yes', 'on')
elif isinstance(value, (int, float)):
return bool(value)
else:
return None
elif expected_type == 'INTEGER':
if isinstance(value, int):
return value
elif isinstance(value, float):
return int(value)
elif isinstance(value, str) and value.isdigit():
return int(value)
elif isinstance(value, bool):
return int(value)
else:
return None
elif expected_type == 'NUMERIC':
if isinstance(value, (int, float)):
return value
elif isinstance(value, str):
try:
return float(value)
except ValueError:
return None
elif isinstance(value, bool):
return float(value)
else:
return None
elif expected_type == 'TEXT':
if isinstance(value, str):
return value
elif value is not None:
str_value = str(value)
# Handle very long strings
if len(str_value) > 65535:
return str_value[:65535]
return str_value
else:
return None
elif expected_type == 'TIMESTAMP':
if hasattr(value, 'isoformat'):
return value.isoformat()
elif isinstance(value, str):
return value
else:
return str(value) if value is not None else None
elif expected_type == 'JSONB':
if isinstance(value, (dict, list)):
return json.dumps(value, default=self._json_serializer)
elif isinstance(value, str):
# Check if it's already valid JSON
try:
json.loads(value)
return value
except (json.JSONDecodeError, TypeError):
# Not valid JSON, wrap it
return json.dumps(value)
else:
return json.dumps(value, default=self._json_serializer)
# Fallback to original logic if no expected type or type not recognized
if isinstance(value, bool):
return value
elif isinstance(value, (int, float)):
return value
elif isinstance(value, str):
return value
elif isinstance(value, (dict, list)):
return json.dumps(value, default=self._json_serializer)
elif hasattr(value, 'isoformat'): # datetime
return value.isoformat()
elif hasattr(value, '__str__'):
str_value = str(value)
if len(str_value) > 65535:
return str_value[:65535]
return str_value
else:
return str(value)
def _json_serializer(self, obj):
"""Custom JSON serializer for complex objects with better error handling"""
try:
if hasattr(obj, 'isoformat'): # datetime
return obj.isoformat()
elif hasattr(obj, '__str__'):
return str(obj)
else:
return None
except Exception as e:
self.log_debug(f"JSON serialization error: {e}")
return str(obj)
def run_conversion(self, sample_size: int = 100, batch_size: int = 1000):
"""Run the full conversion process with focused debugging"""
print("Starting MongoDB to PostgreSQL conversion...")
print("This will convert your Rocket.Chat database from MongoDB to PostgreSQL")
if self.debug_mode:
if self.debug_collections:
print(f"🐛 DEBUG MODE: Focusing on collections: {', '.join(self.debug_collections)}")
else:
print("🐛 DEBUG MODE: All collections")
print("=" * 70)
# Step 1: Discover collections
self.discover_collections()
# Step 2: Analyze schemas
print("\nAnalyzing collection schemas...")
for collection_name in self.collections:
self.analyze_schema(collection_name, sample_size)
# Sample problematic collections if debugging
if self.debug_mode and self.debug_collections:
for coll in self.debug_collections:
if coll in self.collections:
self.sample_documents(coll, 2)
# Step 3: Generate PostgreSQL schema
table_definitions = self.generate_postgres_schema()
# Step 4: Create database schema
self.create_postgres_database(table_definitions)
# Step 5: Convert and insert data
self.convert_and_insert_data(batch_size)
# Step 6: Show error summary
self._print_error_summary()
print("=" * 70)
print("✅ Conversion completed!")
print(f" Database: {self.postgres_config['database']}")
print(f" Tables created: {len(table_definitions)}")
def _print_error_summary(self):
"""Print a focused summary of errors"""
if not self.error_log:
print("\n✅ No errors encountered during conversion!")
return
print("\n⚠️ ERROR SUMMARY:")
print("=" * 50)
# Sort by error count descending
sorted_collections = sorted(self.error_log.items(),
key=lambda x: len(x[1]), reverse=True)
for collection, errors in sorted_collections:
error_types = {}
for error in errors:
error_type = error['type']
if error_type not in error_types:
error_types[error_type] = []
error_types[error_type].append(error['details'])
print(f"\n🔴 {collection} ({len(errors)} total errors):")
for error_type, details_list in error_types.items():
print(f" {error_type}: {len(details_list)} errors")
# Show sample errors for critical collections
if collection in ['rocketchat_settings', 'rocketchat_room'] and len(details_list) > 0:
print(f" Sample: {details_list[0][:100]}...")
def main():
parser = argparse.ArgumentParser(
description='Convert MongoDB BSON export to PostgreSQL',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic usage
python3 mongo_to_postgres_converter.py \\
--mongo-path db/database/62df06d44234d20001289144 \\
--pg-database rocketchat_converted \\
--pg-user rocketchat_user \\
--pg-password mypassword
# Debug specific failing collections
python3 mongo_to_postgres_converter.py \\
--mongo-path db/database/62df06d44234d20001289144 \\
--pg-database rocketchat_converted \\
--pg-user rocketchat_user \\
--pg-password mypassword \\
--debug-collections rocketchat_settings rocketchat_room
Before running this script:
1. Run: sudo -u postgres psql -f reset_database.sql
2. Update the password in reset_database.sql
"""
)
parser.add_argument('--mongo-path', required=True, help='Path to MongoDB export directory')
parser.add_argument('--pg-host', default='localhost', help='PostgreSQL host (default: localhost)')
parser.add_argument('--pg-port', default='5432', help='PostgreSQL port (default: 5432)')
parser.add_argument('--pg-database', required=True, help='PostgreSQL database name')
parser.add_argument('--pg-user', required=True, help='PostgreSQL username')
parser.add_argument('--pg-password', required=True, help='PostgreSQL password')
parser.add_argument('--sample-size', type=int, default=100, help='Number of documents to sample for schema analysis (default: 100)')
parser.add_argument('--batch-size', type=int, default=1000, help='Batch size for data insertion (default: 1000)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode with detailed error logging')
parser.add_argument('--debug-collections', nargs='*', help='Specific collections to debug (e.g., rocketchat_settings rocketchat_room)')
args = parser.parse_args()
postgres_config = {
'host': args.pg_host,
'port': args.pg_port,
'database': args.pg_database,
'user': args.pg_user,
'password': args.pg_password
}
# Enable debug mode if debug collections are specified
debug_mode = args.debug or (args.debug_collections is not None)
converter = MongoToPostgresConverter(args.mongo_path, postgres_config, debug_mode, args.debug_collections)
converter.run_conversion(args.sample_size, args.batch_size)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,41 @@
-- PostgreSQL Database Reset Script for Rocket.Chat Import
-- Run as: sudo -u postgres psql -f reset_database.sql
-- Terminate all connections to the database (force disconnect users)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'rocketchat_converted' AND pid <> pg_backend_pid();
-- Drop the database if it exists
DROP DATABASE IF EXISTS rocketchat_converted;
-- Create fresh database
CREATE DATABASE rocketchat_converted;
-- Create user (if not exists)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = 'rocketchat_user') THEN
CREATE USER rocketchat_user WITH PASSWORD 'HKjLgt23gWuPXzEAn3rW';
END IF;
END $$;
-- Grant database privileges
GRANT CONNECT ON DATABASE rocketchat_converted TO rocketchat_user;
GRANT CREATE ON DATABASE rocketchat_converted TO rocketchat_user;
-- Connect to the new database
\c rocketchat_converted;
-- Grant schema privileges
GRANT CREATE ON SCHEMA public TO rocketchat_user;
GRANT USAGE ON SCHEMA public TO rocketchat_user;
-- Grant privileges on all future tables and sequences
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO rocketchat_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO rocketchat_user;
-- Display success message
\echo 'Database reset completed successfully!'
\echo 'You can now run the converter with:'
\echo 'python3 mongo_to_postgres_converter.py --mongo-path db/database/62df06d44234d20001289144 --pg-database rocketchat_converted --pg-user rocketchat_user --pg-password your_password'

View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""
Quick test script to verify the converter fixes work for problematic collections
"""
from mongo_to_postgres_converter import MongoToPostgresConverter
def test_problematic_collections():
print("🧪 Testing converter fixes for problematic collections...")
postgres_config = {
'host': 'localhost',
'port': '5432',
'database': 'rocketchat_test',
'user': 'rocketchat_user',
'password': 'password123'
}
converter = MongoToPostgresConverter(
'db/database/62df06d44234d20001289144',
postgres_config,
debug_mode=True,
debug_collections=['rocketchat_settings', 'rocketchat_room']
)
# Test just discovery and schema analysis
print("\n1. Testing collection discovery...")
converter.discover_collections()
print("\n2. Testing schema analysis...")
if 'rocketchat_settings' in converter.collections:
settings_schema = converter.analyze_schema('rocketchat_settings', 10)
print(f"Settings schema fields: {len(settings_schema)}")
# Check specific problematic fields
if 'packageValue' in settings_schema:
packagevalue_info = settings_schema['packageValue']
pg_type = converter._determine_postgres_type(packagevalue_info)
print(f"packageValue types: {packagevalue_info['types']} -> PostgreSQL: {pg_type}")
if 'rocketchat_room' in converter.collections:
room_schema = converter.analyze_schema('rocketchat_room', 10)
print(f"Room schema fields: {len(room_schema)}")
# Check specific problematic fields
if 'sysMes' in room_schema:
sysmes_info = room_schema['sysMes']
pg_type = converter._determine_postgres_type(sysmes_info)
print(f"sysMes types: {sysmes_info['types']} -> PostgreSQL: {pg_type}")
print("\n✅ Test completed - check the type mappings above!")
if __name__ == '__main__':
test_problematic_collections()

1447
inventory-server/chat/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
{
"name": "chat-server",
"version": "1.0.0",
"description": "Chat archive server for Rocket.Chat data",
"main": "server.js",
"scripts": {
"start": "node server.js",
"dev": "nodemon server.js"
},
"dependencies": {
"express": "^4.18.2",
"cors": "^2.8.5",
"pg": "^8.11.0",
"dotenv": "^16.0.3",
"morgan": "^1.10.0"
},
"devDependencies": {
"nodemon": "^2.0.22"
}
}

View File

@@ -0,0 +1,87 @@
const express = require('express');
const router = express.Router();
// Get all active users for the "view as" dropdown
router.get('/users', async (req, res) => {
try {
const result = await global.pool.query(`
SELECT id, username, name, type, active
FROM users
WHERE active = true AND type = 'user'
ORDER BY name ASC
`);
res.json({
status: 'success',
users: result.rows
});
} catch (error) {
console.error('Error fetching users:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch users',
details: error.message
});
}
});
// Get rooms for a specific user
router.get('/users/:userId/rooms', async (req, res) => {
const { userId } = req.params;
try {
// Get rooms where the user is a member
const result = await global.pool.query(`
SELECT DISTINCT r.id, r.name, r.fname, r.t as type, r.msgs, r.lm as last_message_date
FROM room r, subscription s
WHERE s.rid = r.mongo_id
AND s.u->>'_id' = (SELECT mongo_id FROM users WHERE id = $1)
AND r.archived IS NOT TRUE
ORDER BY r.lm DESC NULLS LAST
LIMIT 50
`, [userId]);
res.json({
status: 'success',
rooms: result.rows
});
} catch (error) {
console.error('Error fetching user rooms:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch user rooms',
details: error.message
});
}
});
// Get messages for a specific room
router.get('/rooms/:roomId/messages', async (req, res) => {
const { roomId } = req.params;
const { limit = 50, offset = 0 } = req.query;
try {
const result = await global.pool.query(`
SELECT m.id, m.msg, m.ts, m.u, m._updatedat
FROM message m
JOIN room r ON m.rid = r.mongo_id
WHERE r.id = $1
ORDER BY m.ts DESC
LIMIT $2 OFFSET $3
`, [roomId, limit, offset]);
res.json({
status: 'success',
messages: result.rows
});
} catch (error) {
console.error('Error fetching messages:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch messages',
details: error.message
});
}
});
module.exports = router;

View File

@@ -0,0 +1,83 @@
require('dotenv').config({ path: '../.env' });
const express = require('express');
const cors = require('cors');
const { Pool } = require('pg');
const morgan = require('morgan');
const chatRoutes = require('./routes');
// Log startup configuration
console.log('Starting chat server with config:', {
host: process.env.CHAT_DB_HOST,
user: process.env.CHAT_DB_USER,
database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
port: process.env.CHAT_DB_PORT,
chat_port: process.env.CHAT_PORT || 3014
});
const app = express();
const port = process.env.CHAT_PORT || 3014;
// Database configuration for rocketchat_converted database
const pool = new Pool({
host: process.env.CHAT_DB_HOST,
user: process.env.CHAT_DB_USER,
password: process.env.CHAT_DB_PASSWORD,
database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
port: process.env.CHAT_DB_PORT,
});
// Make pool available globally
global.pool = pool;
// Middleware
app.use(express.json());
app.use(morgan('combined'));
app.use(cors({
origin: ['http://localhost:5175', 'http://localhost:5174', 'https://inventory.kent.pw'],
credentials: true
}));
// Test database connection endpoint
app.get('/test-db', async (req, res) => {
try {
const result = await pool.query('SELECT COUNT(*) as user_count FROM users WHERE active = true');
const messageResult = await pool.query('SELECT COUNT(*) as message_count FROM message');
const roomResult = await pool.query('SELECT COUNT(*) as room_count FROM room');
res.json({
status: 'success',
database: 'rocketchat_converted',
stats: {
active_users: parseInt(result.rows[0].user_count),
total_messages: parseInt(messageResult.rows[0].message_count),
total_rooms: parseInt(roomResult.rows[0].room_count)
}
});
} catch (error) {
console.error('Database test error:', error);
res.status(500).json({
status: 'error',
error: 'Database connection failed',
details: error.message
});
}
});
// Mount all routes from routes.js
app.use('/', chatRoutes);
// Health check endpoint
app.get('/health', (req, res) => {
res.json({ status: 'healthy' });
});
// Error handling middleware
app.use((err, req, res, next) => {
console.error(err.stack);
res.status(500).json({ error: 'Something broke!' });
});
// Start server
app.listen(port, () => {
console.log(`Chat server running on port ${port}`);
});