Put back files

This commit is contained in:
2025-10-04 16:14:09 -04:00
parent 4953355b91
commit e84c7e568f
166 changed files with 61620 additions and 259 deletions

View File

@@ -0,0 +1,45 @@
-- PostgreSQL Database Creation Script for New Server
-- Run as: sudo -u postgres psql -f create-new-database.sql
-- Terminate all connections to the database (if it exists)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'rocketchat_converted' AND pid <> pg_backend_pid();
-- Drop the database if it exists
DROP DATABASE IF EXISTS rocketchat_converted;
-- Create fresh database
CREATE DATABASE rocketchat_converted;
-- Create user (if not exists) - UPDATE PASSWORD BEFORE RUNNING!
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = 'rocketchat_user') THEN
CREATE USER rocketchat_user WITH PASSWORD 'HKjLgt23gWuPXzEAn3rW';
END IF;
END $$;
-- Grant database privileges
GRANT CONNECT ON DATABASE rocketchat_converted TO rocketchat_user;
GRANT CREATE ON DATABASE rocketchat_converted TO rocketchat_user;
-- Connect to the new database
\c rocketchat_converted;
-- Grant schema privileges
GRANT CREATE ON SCHEMA public TO rocketchat_user;
GRANT USAGE ON SCHEMA public TO rocketchat_user;
-- Grant privileges on all future tables and sequences
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO rocketchat_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO rocketchat_user;
-- Display success message
\echo 'Database created successfully!'
\echo 'IMPORTANT: Update the password for rocketchat_user before proceeding'
\echo 'Next steps:'
\echo '1. Update the password in this file'
\echo '2. Run export-chat-data.sh on your current server'
\echo '3. Transfer the exported files to this server'
\echo '4. Run import-chat-data.sh on this server'

View File

@@ -0,0 +1,881 @@
#!/usr/bin/env python3
"""
MongoDB to PostgreSQL Converter for Rocket.Chat
Converts MongoDB BSON export files to PostgreSQL database
Usage:
python3 mongo_to_postgres_converter.py \
--mongo-path db/database/62df06d44234d20001289144 \
--pg-database rocketchat_converted \
--pg-user rocketchat_user \
--pg-password your_password \
--debug
"""
import json
import os
import re
import subprocess
import sys
import struct
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, List, Optional
import argparse
import traceback
# Auto-install dependencies if needed
try:
import bson
import psycopg2
except ImportError:
print("Installing required packages...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "pymongo", "psycopg2-binary"])
import bson
import psycopg2
class MongoToPostgresConverter:
def __init__(self, mongo_db_path: str, postgres_config: Dict[str, str], debug_mode: bool = False, debug_collections: List[str] = None):
self.mongo_db_path = Path(mongo_db_path)
self.postgres_config = postgres_config
self.debug_mode = debug_mode
self.debug_collections = debug_collections or []
self.collections = {}
self.schema_info = {}
self.error_log = {}
def log_debug(self, message: str, collection: str = None):
"""Log debug messages if debug mode is enabled and collection is in debug list"""
if self.debug_mode and (not self.debug_collections or collection in self.debug_collections):
print(f"DEBUG: {message}")
def log_error(self, collection: str, error_type: str, details: str):
"""Log detailed error information"""
if collection not in self.error_log:
self.error_log[collection] = []
self.error_log[collection].append({
'type': error_type,
'details': details,
'timestamp': datetime.now().isoformat()
})
def sample_documents(self, collection_name: str, max_samples: int = 3) -> List[Dict]:
"""Sample documents from a collection for debugging"""
if not self.debug_mode or (self.debug_collections and collection_name not in self.debug_collections):
return []
print(f"\n🔍 Sampling documents from {collection_name}:")
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
print(" Collection is empty")
return []
samples = []
try:
with open(bson_file, 'rb') as f:
sample_count = 0
while sample_count < max_samples:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
samples.append(doc)
sample_count += 1
print(f" Sample {sample_count} - Keys: {list(doc.keys())}")
# Show a few key fields with their types and truncated values
for key, value in list(doc.items())[:3]:
value_preview = str(value)[:50] + "..." if len(str(value)) > 50 else str(value)
print(f" {key}: {type(value).__name__} = {value_preview}")
if len(doc) > 3:
print(f" ... and {len(doc) - 3} more fields")
print()
except (bson.InvalidBSON, struct.error, OSError) as e:
self.log_error(collection_name, 'document_parsing', str(e))
break
except Exception as e:
self.log_error(collection_name, 'file_reading', str(e))
print(f" Error reading collection: {e}")
return samples
def discover_collections(self):
"""Discover all BSON files and their metadata"""
print("Discovering MongoDB collections...")
for bson_file in self.mongo_db_path.glob("*.bson"):
collection_name = bson_file.stem
metadata_file = bson_file.with_suffix(".metadata.json")
# Read metadata if available
metadata = {}
if metadata_file.exists():
try:
with open(metadata_file, 'r', encoding='utf-8') as f:
metadata = json.load(f)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
print(f"Warning: Could not read metadata for {collection_name}: {e}")
metadata = {}
# Get file size and document count estimate
file_size = bson_file.stat().st_size
doc_count = self._estimate_document_count(bson_file)
self.collections[collection_name] = {
'bson_file': bson_file,
'metadata': metadata,
'file_size': file_size,
'estimated_docs': doc_count
}
print(f"Found {len(self.collections)} collections")
for name, info in self.collections.items():
print(f" - {name}: {info['file_size']/1024/1024:.1f}MB (~{info['estimated_docs']} docs)")
def _estimate_document_count(self, bson_file: Path) -> int:
"""Estimate document count by reading first few documents"""
if bson_file.stat().st_size == 0:
return 0
try:
with open(bson_file, 'rb') as f:
docs_sampled = 0
bytes_sampled = 0
max_sample_size = min(1024 * 1024, bson_file.stat().st_size) # 1MB or file size
while bytes_sampled < max_sample_size:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0 or doc_size > 16 * 1024 * 1024: # MongoDB doc size limit
break
f.seek(-4, 1) # Go back
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
bson.decode(doc_bytes) # Validate it's a valid BSON document
docs_sampled += 1
bytes_sampled += doc_size
except (bson.InvalidBSON, struct.error, OSError):
break
if docs_sampled > 0 and bytes_sampled > 0:
avg_doc_size = bytes_sampled / docs_sampled
return int(bson_file.stat().st_size / avg_doc_size)
except Exception:
pass
return 0
def analyze_schema(self, collection_name: str, sample_size: int = 100) -> Dict[str, Any]:
"""Analyze collection schema by sampling documents"""
print(f"Analyzing schema for {collection_name}...")
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
return {}
schema = {}
docs_analyzed = 0
try:
with open(bson_file, 'rb') as f:
while docs_analyzed < sample_size:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
self._analyze_document_schema(doc, schema)
docs_analyzed += 1
except (bson.InvalidBSON, struct.error, OSError):
break
except Exception as e:
print(f"Error analyzing {collection_name}: {e}")
self.schema_info[collection_name] = schema
return schema
def _analyze_document_schema(self, doc: Dict[str, Any], schema: Dict[str, Any], prefix: str = ""):
"""Recursively analyze document structure"""
for key, value in doc.items():
full_key = f"{prefix}.{key}" if prefix else key
if full_key not in schema:
schema[full_key] = {
'types': set(),
'null_count': 0,
'total_count': 0,
'is_array': False,
'nested_schema': {}
}
schema[full_key]['total_count'] += 1
if value is None:
schema[full_key]['null_count'] += 1
schema[full_key]['types'].add('null')
elif isinstance(value, dict):
schema[full_key]['types'].add('object')
if 'nested_schema' not in schema[full_key]:
schema[full_key]['nested_schema'] = {}
self._analyze_document_schema(value, schema[full_key]['nested_schema'])
elif isinstance(value, list):
schema[full_key]['types'].add('array')
schema[full_key]['is_array'] = True
if value and isinstance(value[0], dict):
if 'array_item_schema' not in schema[full_key]:
schema[full_key]['array_item_schema'] = {}
for item in value[:5]: # Sample first 5 items
if isinstance(item, dict):
self._analyze_document_schema(item, schema[full_key]['array_item_schema'])
else:
schema[full_key]['types'].add(type(value).__name__)
def generate_postgres_schema(self) -> Dict[str, str]:
"""Generate PostgreSQL CREATE TABLE statements"""
print("Generating PostgreSQL schema...")
table_definitions = {}
for collection_name, schema in self.schema_info.items():
if not schema: # Empty collection
continue
table_name = self._sanitize_table_name(collection_name)
columns = []
# Always add an id column (PostgreSQL doesn't use _id like MongoDB)
columns.append("id SERIAL PRIMARY KEY")
for field_name, field_info in schema.items():
if field_name == '_id':
columns.append("mongo_id TEXT") # Always allow NULL for mongo_id
continue
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
col_type = self._determine_postgres_type(field_info)
# Make all fields nullable by default to avoid constraint violations
columns.append(f"{col_name} {col_type}")
# Add metadata columns
columns.extend([
"created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
"updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP"
])
column_definitions = ',\n '.join(columns)
table_sql = f"""
CREATE TABLE IF NOT EXISTS {table_name} (
{column_definitions}
);
-- Create indexes based on MongoDB indexes
"""
# Get list of actual columns that will exist in the table
existing_columns = set(['id', 'mongo_id', 'created_at', 'updated_at'])
for field_name in schema.keys():
if field_name != '_id':
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
existing_columns.add(col_name)
# Add indexes from MongoDB metadata
metadata = self.collections[collection_name].get('metadata', {})
indexes = metadata.get('indexes', [])
for index in indexes:
if index['name'] != '_id_': # Skip the default _id index
# Sanitize index name - remove special characters
sanitized_index_name = re.sub(r'[^a-zA-Z0-9_]', '_', index['name'])
index_name = f"idx_{table_name}_{sanitized_index_name}"
index_keys = list(index['key'].keys())
if index_keys:
sanitized_keys = []
for key in index_keys:
if key != '_id':
sanitized_key = self._sanitize_column_name(key)
# Handle conflicts with PostgreSQL auto-generated columns
if sanitized_key in ['id', 'mongo_id', 'created_at', 'updated_at']:
sanitized_key = f"field_{sanitized_key}"
# Only add if the column actually exists in our table
if sanitized_key in existing_columns:
sanitized_keys.append(sanitized_key)
if sanitized_keys:
table_sql += f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name} ({', '.join(sanitized_keys)});\n"
table_definitions[collection_name] = table_sql
return table_definitions
def _sanitize_table_name(self, name: str) -> str:
"""Convert MongoDB collection name to PostgreSQL table name"""
# Remove rocketchat_ prefix if present
if name.startswith('rocketchat_'):
name = name[11:]
# Replace special characters with underscores
name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
# Ensure it starts with a letter
if name and name[0].isdigit():
name = 'table_' + name
return name.lower()
def _sanitize_column_name(self, name: str) -> str:
"""Convert MongoDB field name to PostgreSQL column name"""
# Handle nested field names (convert dots to underscores)
name = name.replace('.', '_')
# Replace special characters with underscores
name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
# Ensure it starts with a letter or underscore
if name and name[0].isdigit():
name = 'col_' + name
# Handle PostgreSQL reserved words
reserved = {
'user', 'order', 'group', 'table', 'index', 'key', 'value', 'date', 'time', 'timestamp',
'default', 'select', 'from', 'where', 'insert', 'update', 'delete', 'create', 'drop',
'alter', 'grant', 'revoke', 'commit', 'rollback', 'begin', 'end', 'case', 'when',
'then', 'else', 'if', 'null', 'not', 'and', 'or', 'in', 'exists', 'between',
'like', 'limit', 'offset', 'union', 'join', 'inner', 'outer', 'left', 'right',
'full', 'cross', 'natural', 'on', 'using', 'distinct', 'all', 'any', 'some',
'desc', 'asc', 'primary', 'foreign', 'references', 'constraint', 'unique',
'check', 'cascade', 'restrict', 'action', 'match', 'partial', 'full'
}
if name.lower() in reserved:
name = name + '_col'
return name.lower()
def _determine_postgres_type(self, field_info: Dict[str, Any]) -> str:
"""Determine PostgreSQL column type from MongoDB field analysis with improved logic"""
types = field_info['types']
# Convert set to list for easier checking
type_list = list(types)
# If there's only one type (excluding null), use specific typing
non_null_types = [t for t in type_list if t != 'null']
if len(non_null_types) == 1:
single_type = non_null_types[0]
if single_type == 'bool':
return 'BOOLEAN'
elif single_type == 'int':
return 'INTEGER'
elif single_type == 'float':
return 'NUMERIC'
elif single_type == 'str':
return 'TEXT'
elif single_type == 'datetime':
return 'TIMESTAMP'
elif single_type == 'ObjectId':
return 'TEXT'
# Handle mixed types more conservatively
if 'array' in types or field_info.get('is_array', False):
return 'JSONB' # Arrays always go to JSONB
elif 'object' in types:
return 'JSONB' # Objects always go to JSONB
elif len(non_null_types) > 1:
# Multiple non-null types - check for common combinations
if set(non_null_types) <= {'int', 'float'}:
return 'NUMERIC' # Can handle both int and float
elif set(non_null_types) <= {'bool', 'str'}:
return 'TEXT' # Convert everything to text
elif set(non_null_types) <= {'str', 'ObjectId'}:
return 'TEXT' # Both are string-like
else:
return 'JSONB' # Complex mixed types go to JSONB
elif 'ObjectId' in types:
return 'TEXT'
elif 'datetime' in types:
return 'TIMESTAMP'
elif 'bool' in types:
return 'BOOLEAN'
elif 'int' in types:
return 'INTEGER'
elif 'float' in types:
return 'NUMERIC'
elif 'str' in types:
return 'TEXT'
else:
return 'TEXT' # Default fallback
def create_postgres_database(self, table_definitions: Dict[str, str]):
"""Create PostgreSQL database and tables"""
print("Creating PostgreSQL database schema...")
try:
# Connect to PostgreSQL
conn = psycopg2.connect(**self.postgres_config)
conn.autocommit = True
cursor = conn.cursor()
# Create tables
for collection_name, table_sql in table_definitions.items():
print(f"Creating table for {collection_name}...")
cursor.execute(table_sql)
cursor.close()
conn.close()
print("Database schema created successfully!")
except Exception as e:
print(f"Error creating database schema: {e}")
raise
def convert_and_insert_data(self, batch_size: int = 1000):
"""Convert BSON data and insert into PostgreSQL"""
print("Converting and inserting data...")
try:
conn = psycopg2.connect(**self.postgres_config)
conn.autocommit = False
for collection_name in self.collections:
print(f"Processing {collection_name}...")
self._convert_collection(conn, collection_name, batch_size)
conn.close()
print("Data conversion completed successfully!")
except Exception as e:
print(f"Error converting data: {e}")
raise
def _convert_collection(self, conn, collection_name: str, batch_size: int):
"""Convert a single collection"""
bson_file = self.collections[collection_name]['bson_file']
if bson_file.stat().st_size == 0:
print(f" Skipping empty collection {collection_name}")
return
table_name = self._sanitize_table_name(collection_name)
cursor = conn.cursor()
batch = []
total_inserted = 0
errors = 0
try:
with open(bson_file, 'rb') as f:
while True:
try:
doc_size = int.from_bytes(f.read(4), byteorder='little')
if doc_size <= 0:
break
f.seek(-4, 1)
doc_bytes = f.read(doc_size)
if len(doc_bytes) != doc_size:
break
doc = bson.decode(doc_bytes)
batch.append(doc)
if len(batch) >= batch_size:
inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
total_inserted += inserted
errors += batch_errors
batch = []
conn.commit()
if total_inserted % 5000 == 0: # Less frequent progress updates
print(f" Inserted {total_inserted} documents...")
except (bson.InvalidBSON, struct.error, OSError):
break
# Insert remaining documents
if batch:
inserted, batch_errors = self._insert_batch(cursor, table_name, batch, collection_name)
total_inserted += inserted
errors += batch_errors
conn.commit()
if errors > 0:
print(f" Completed {collection_name}: {total_inserted} documents inserted ({errors} errors)")
else:
print(f" Completed {collection_name}: {total_inserted} documents inserted")
except Exception as e:
print(f" Error processing {collection_name}: {e}")
conn.rollback()
finally:
cursor.close()
def _insert_batch(self, cursor, table_name: str, documents: List[Dict], collection_name: str):
"""Insert a batch of documents with proper transaction handling"""
if not documents:
return 0, 0
# Get schema info for this collection
schema = self.schema_info.get(collection_name, {})
# Build column list
columns = ['mongo_id']
for field_name in schema.keys():
if field_name != '_id':
col_name = self._sanitize_column_name(field_name)
# Handle conflicts with PostgreSQL auto-generated columns
if col_name in ['id', 'mongo_id', 'created_at', 'updated_at']:
col_name = f"field_{col_name}"
columns.append(col_name)
# Build INSERT statement
placeholders = ', '.join(['%s'] * len(columns))
sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
self.log_debug(f"SQL: {sql}", collection_name)
# Convert documents to tuples
rows = []
errors = 0
for doc_idx, doc in enumerate(documents):
try:
row = []
# Add mongo_id
row.append(str(doc.get('_id', '')))
# Add other fields
for field_name in schema.keys():
if field_name != '_id':
try:
value = self._get_nested_value(doc, field_name)
converted_value = self._convert_value_for_postgres(value, field_name, schema)
row.append(converted_value)
except Exception as e:
self.log_error(collection_name, 'field_conversion',
f"Field '{field_name}' in doc {doc_idx}: {str(e)}")
# Only show debug for collections we're focusing on
if collection_name in self.debug_collections:
print(f" ⚠️ Error converting field '{field_name}': {e}")
row.append(None) # Use NULL for problematic fields
rows.append(tuple(row))
except Exception as e:
self.log_error(collection_name, 'document_conversion', f"Document {doc_idx}: {str(e)}")
errors += 1
continue
# Execute batch insert
if rows:
try:
cursor.executemany(sql, rows)
return len(rows), errors
except Exception as batch_error:
self.log_error(collection_name, 'batch_insert', str(batch_error))
# Only show detailed debugging for targeted collections
if collection_name in self.debug_collections:
print(f" 🔴 Batch insert failed for {collection_name}: {batch_error}")
print(" Trying individual inserts with rollback handling...")
# Rollback the failed transaction
cursor.connection.rollback()
# Try inserting one by one in individual transactions
success_count = 0
for row_idx, row in enumerate(rows):
try:
cursor.execute(sql, row)
cursor.connection.commit() # Commit each successful insert
success_count += 1
except Exception as row_error:
cursor.connection.rollback() # Rollback failed insert
self.log_error(collection_name, 'row_insert', f"Row {row_idx}: {str(row_error)}")
# Show detailed error only for the first few failures and only for targeted collections
if collection_name in self.debug_collections and errors < 3:
print(f" Row {row_idx} failed: {row_error}")
print(f" Row data: {len(row)} values, expected {len(columns)} columns")
errors += 1
continue
return success_count, errors
return 0, errors
def _get_nested_value(self, doc: Dict, field_path: str):
"""Get value from nested document using dot notation"""
keys = field_path.split('.')
value = doc
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return None
return value
def _convert_value_for_postgres(self, value, field_name: str = None, schema: Dict = None):
"""Convert MongoDB value to PostgreSQL compatible value with schema-aware conversion"""
if value is None:
return None
# Get the expected PostgreSQL type for this field if available
expected_type = None
if schema and field_name and field_name in schema:
field_info = schema[field_name]
expected_type = self._determine_postgres_type(field_info)
# Handle conversion based on expected type
if expected_type == 'BOOLEAN':
if isinstance(value, bool):
return value
elif isinstance(value, str):
return value.lower() in ('true', '1', 'yes', 'on')
elif isinstance(value, (int, float)):
return bool(value)
else:
return None
elif expected_type == 'INTEGER':
if isinstance(value, int):
return value
elif isinstance(value, float):
return int(value)
elif isinstance(value, str) and value.isdigit():
return int(value)
elif isinstance(value, bool):
return int(value)
else:
return None
elif expected_type == 'NUMERIC':
if isinstance(value, (int, float)):
return value
elif isinstance(value, str):
try:
return float(value)
except ValueError:
return None
elif isinstance(value, bool):
return float(value)
else:
return None
elif expected_type == 'TEXT':
if isinstance(value, str):
return value
elif value is not None:
str_value = str(value)
# Handle very long strings
if len(str_value) > 65535:
return str_value[:65535]
return str_value
else:
return None
elif expected_type == 'TIMESTAMP':
if hasattr(value, 'isoformat'):
return value.isoformat()
elif isinstance(value, str):
return value
else:
return str(value) if value is not None else None
elif expected_type == 'JSONB':
if isinstance(value, (dict, list)):
return json.dumps(value, default=self._json_serializer)
elif isinstance(value, str):
# Check if it's already valid JSON
try:
json.loads(value)
return value
except (json.JSONDecodeError, TypeError):
# Not valid JSON, wrap it
return json.dumps(value)
else:
return json.dumps(value, default=self._json_serializer)
# Fallback to original logic if no expected type or type not recognized
if isinstance(value, bool):
return value
elif isinstance(value, (int, float)):
return value
elif isinstance(value, str):
return value
elif isinstance(value, (dict, list)):
return json.dumps(value, default=self._json_serializer)
elif hasattr(value, 'isoformat'): # datetime
return value.isoformat()
elif hasattr(value, '__str__'):
str_value = str(value)
if len(str_value) > 65535:
return str_value[:65535]
return str_value
else:
return str(value)
def _json_serializer(self, obj):
"""Custom JSON serializer for complex objects with better error handling"""
try:
if hasattr(obj, 'isoformat'): # datetime
return obj.isoformat()
elif hasattr(obj, '__str__'):
return str(obj)
else:
return None
except Exception as e:
self.log_debug(f"JSON serialization error: {e}")
return str(obj)
def run_conversion(self, sample_size: int = 100, batch_size: int = 1000):
"""Run the full conversion process with focused debugging"""
print("Starting MongoDB to PostgreSQL conversion...")
print("This will convert your Rocket.Chat database from MongoDB to PostgreSQL")
if self.debug_mode:
if self.debug_collections:
print(f"🐛 DEBUG MODE: Focusing on collections: {', '.join(self.debug_collections)}")
else:
print("🐛 DEBUG MODE: All collections")
print("=" * 70)
# Step 1: Discover collections
self.discover_collections()
# Step 2: Analyze schemas
print("\nAnalyzing collection schemas...")
for collection_name in self.collections:
self.analyze_schema(collection_name, sample_size)
# Sample problematic collections if debugging
if self.debug_mode and self.debug_collections:
for coll in self.debug_collections:
if coll in self.collections:
self.sample_documents(coll, 2)
# Step 3: Generate PostgreSQL schema
table_definitions = self.generate_postgres_schema()
# Step 4: Create database schema
self.create_postgres_database(table_definitions)
# Step 5: Convert and insert data
self.convert_and_insert_data(batch_size)
# Step 6: Show error summary
self._print_error_summary()
print("=" * 70)
print("✅ Conversion completed!")
print(f" Database: {self.postgres_config['database']}")
print(f" Tables created: {len(table_definitions)}")
def _print_error_summary(self):
"""Print a focused summary of errors"""
if not self.error_log:
print("\n✅ No errors encountered during conversion!")
return
print("\n⚠️ ERROR SUMMARY:")
print("=" * 50)
# Sort by error count descending
sorted_collections = sorted(self.error_log.items(),
key=lambda x: len(x[1]), reverse=True)
for collection, errors in sorted_collections:
error_types = {}
for error in errors:
error_type = error['type']
if error_type not in error_types:
error_types[error_type] = []
error_types[error_type].append(error['details'])
print(f"\n🔴 {collection} ({len(errors)} total errors):")
for error_type, details_list in error_types.items():
print(f" {error_type}: {len(details_list)} errors")
# Show sample errors for critical collections
if collection in ['rocketchat_settings', 'rocketchat_room'] and len(details_list) > 0:
print(f" Sample: {details_list[0][:100]}...")
def main():
parser = argparse.ArgumentParser(
description='Convert MongoDB BSON export to PostgreSQL',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic usage
python3 mongo_to_postgres_converter.py \\
--mongo-path db/database/62df06d44234d20001289144 \\
--pg-database rocketchat_converted \\
--pg-user rocketchat_user \\
--pg-password mypassword
# Debug specific failing collections
python3 mongo_to_postgres_converter.py \\
--mongo-path db/database/62df06d44234d20001289144 \\
--pg-database rocketchat_converted \\
--pg-user rocketchat_user \\
--pg-password mypassword \\
--debug-collections rocketchat_settings rocketchat_room
Before running this script:
1. Run: sudo -u postgres psql -f reset_database.sql
2. Update the password in reset_database.sql
"""
)
parser.add_argument('--mongo-path', required=True, help='Path to MongoDB export directory')
parser.add_argument('--pg-host', default='localhost', help='PostgreSQL host (default: localhost)')
parser.add_argument('--pg-port', default='5432', help='PostgreSQL port (default: 5432)')
parser.add_argument('--pg-database', required=True, help='PostgreSQL database name')
parser.add_argument('--pg-user', required=True, help='PostgreSQL username')
parser.add_argument('--pg-password', required=True, help='PostgreSQL password')
parser.add_argument('--sample-size', type=int, default=100, help='Number of documents to sample for schema analysis (default: 100)')
parser.add_argument('--batch-size', type=int, default=1000, help='Batch size for data insertion (default: 1000)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode with detailed error logging')
parser.add_argument('--debug-collections', nargs='*', help='Specific collections to debug (e.g., rocketchat_settings rocketchat_room)')
args = parser.parse_args()
postgres_config = {
'host': args.pg_host,
'port': args.pg_port,
'database': args.pg_database,
'user': args.pg_user,
'password': args.pg_password
}
# Enable debug mode if debug collections are specified
debug_mode = args.debug or (args.debug_collections is not None)
converter = MongoToPostgresConverter(args.mongo_path, postgres_config, debug_mode, args.debug_collections)
converter.run_conversion(args.sample_size, args.batch_size)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,41 @@
-- PostgreSQL Database Reset Script for Rocket.Chat Import
-- Run as: sudo -u postgres psql -f reset_database.sql
-- Terminate all connections to the database (force disconnect users)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'rocketchat_converted' AND pid <> pg_backend_pid();
-- Drop the database if it exists
DROP DATABASE IF EXISTS rocketchat_converted;
-- Create fresh database
CREATE DATABASE rocketchat_converted;
-- Create user (if not exists)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = 'rocketchat_user') THEN
CREATE USER rocketchat_user WITH PASSWORD 'HKjLgt23gWuPXzEAn3rW';
END IF;
END $$;
-- Grant database privileges
GRANT CONNECT ON DATABASE rocketchat_converted TO rocketchat_user;
GRANT CREATE ON DATABASE rocketchat_converted TO rocketchat_user;
-- Connect to the new database
\c rocketchat_converted;
-- Grant schema privileges
GRANT CREATE ON SCHEMA public TO rocketchat_user;
GRANT USAGE ON SCHEMA public TO rocketchat_user;
-- Grant privileges on all future tables and sequences
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO rocketchat_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO rocketchat_user;
-- Display success message
\echo 'Database reset completed successfully!'
\echo 'You can now run the converter with:'
\echo 'python3 mongo_to_postgres_converter.py --mongo-path db/database/62df06d44234d20001289144 --pg-database rocketchat_converted --pg-user rocketchat_user --pg-password your_password'

View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""
Quick test script to verify the converter fixes work for problematic collections
"""
from mongo_to_postgres_converter import MongoToPostgresConverter
def test_problematic_collections():
print("🧪 Testing converter fixes for problematic collections...")
postgres_config = {
'host': 'localhost',
'port': '5432',
'database': 'rocketchat_test',
'user': 'rocketchat_user',
'password': 'password123'
}
converter = MongoToPostgresConverter(
'db/database/62df06d44234d20001289144',
postgres_config,
debug_mode=True,
debug_collections=['rocketchat_settings', 'rocketchat_room']
)
# Test just discovery and schema analysis
print("\n1. Testing collection discovery...")
converter.discover_collections()
print("\n2. Testing schema analysis...")
if 'rocketchat_settings' in converter.collections:
settings_schema = converter.analyze_schema('rocketchat_settings', 10)
print(f"Settings schema fields: {len(settings_schema)}")
# Check specific problematic fields
if 'packageValue' in settings_schema:
packagevalue_info = settings_schema['packageValue']
pg_type = converter._determine_postgres_type(packagevalue_info)
print(f"packageValue types: {packagevalue_info['types']} -> PostgreSQL: {pg_type}")
if 'rocketchat_room' in converter.collections:
room_schema = converter.analyze_schema('rocketchat_room', 10)
print(f"Room schema fields: {len(room_schema)}")
# Check specific problematic fields
if 'sysMes' in room_schema:
sysmes_info = room_schema['sysMes']
pg_type = converter._determine_postgres_type(sysmes_info)
print(f"sysMes types: {sysmes_info['types']} -> PostgreSQL: {pg_type}")
print("\n✅ Test completed - check the type mappings above!")
if __name__ == '__main__':
test_problematic_collections()

View File

@@ -0,0 +1,147 @@
#!/bin/bash
# Chat Database Export Script
# This script exports the chat database schema and data for migration
set -e # Exit on any error
echo "🚀 Starting chat database export..."
# Configuration - Update these values for your setup
DB_HOST="${CHAT_DB_HOST:-localhost}"
DB_PORT="${CHAT_DB_PORT:-5432}"
DB_NAME="${CHAT_DB_NAME:-rocketchat_converted}"
DB_USER="${CHAT_DB_USER:-rocketchat_user}"
# Check if database connection info is available
if [ -z "$CHAT_DB_PASSWORD" ]; then
echo "⚠️ CHAT_DB_PASSWORD environment variable not set"
echo "Please set it with: export CHAT_DB_PASSWORD='your_password'"
exit 1
fi
echo "📊 Database: $DB_NAME on $DB_HOST:$DB_PORT"
# Create export directory
EXPORT_DIR="chat-migration-$(date +%Y%m%d-%H%M%S)"
mkdir -p "$EXPORT_DIR"
echo "📁 Export directory: $EXPORT_DIR"
# Export database schema
echo "📋 Exporting database schema..."
PGPASSWORD="$CHAT_DB_PASSWORD" pg_dump \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
--schema-only \
--no-owner \
--no-privileges \
-f "$EXPORT_DIR/chat-schema.sql"
if [ $? -eq 0 ]; then
echo "✅ Schema exported successfully"
else
echo "❌ Schema export failed"
exit 1
fi
# Export database data
echo "💾 Exporting database data..."
PGPASSWORD="$CHAT_DB_PASSWORD" pg_dump \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
--data-only \
--no-owner \
--no-privileges \
--disable-triggers \
--column-inserts \
-f "$EXPORT_DIR/chat-data.sql"
if [ $? -eq 0 ]; then
echo "✅ Data exported successfully"
else
echo "❌ Data export failed"
exit 1
fi
# Export file uploads and avatars
echo "📎 Exporting chat files (uploads and avatars)..."
if [ -d "db-convert/db/files" ]; then
cd db-convert/db
tar -czf "../../$EXPORT_DIR/chat-files.tar.gz" files/
cd ../..
echo "✅ Files exported successfully"
else
echo "⚠️ No files directory found at db-convert/db/files"
echo " This is normal if you have no file uploads"
touch "$EXPORT_DIR/chat-files.tar.gz"
fi
# Get table statistics for verification
echo "📈 Generating export statistics..."
PGPASSWORD="$CHAT_DB_PASSWORD" psql \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
-c "
SELECT
schemaname,
tablename,
n_tup_ins as inserted_rows,
n_tup_upd as updated_rows,
n_tup_del as deleted_rows,
n_live_tup as live_rows,
n_dead_tup as dead_rows
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;
" > "$EXPORT_DIR/table-stats.txt"
# Create export summary
cat > "$EXPORT_DIR/export-summary.txt" << EOF
Chat Database Export Summary
===========================
Export Date: $(date)
Database: $DB_NAME
Host: $DB_HOST:$DB_PORT
User: $DB_USER
Files Generated:
- chat-schema.sql: Database schema (tables, indexes, constraints)
- chat-data.sql: All table data
- chat-files.tar.gz: Uploaded files and avatars
- table-stats.txt: Database statistics
- export-summary.txt: This summary
Next Steps:
1. Transfer these files to your new server
2. Run create-new-database.sql on the new server first
3. Run import-chat-data.sh on the new server
4. Update your application configuration
5. Run verify-migration.js to validate the migration
Important Notes:
- Keep these files secure as they contain your chat data
- Ensure the new server has enough disk space
- Plan for application downtime during the migration
EOF
echo ""
echo "🎉 Export completed successfully!"
echo "📁 Files are in: $EXPORT_DIR/"
echo ""
echo "📋 Export Summary:"
ls -lh "$EXPORT_DIR/"
echo ""
echo "🚚 Next steps:"
echo "1. Transfer the $EXPORT_DIR/ directory to your new server"
echo "2. Run create-new-database.sql on the new server (update password first!)"
echo "3. Run import-chat-data.sh on the new server"
echo ""
echo "💡 To transfer files to new server:"
echo " scp -r $EXPORT_DIR/ user@new-server:/tmp/"

View File

@@ -0,0 +1,167 @@
#!/bin/bash
# Chat Database Import Script
# This script imports the chat database schema and data on the new server
set -e # Exit on any error
echo "🚀 Starting chat database import..."
# Configuration - Update these values for your new server
DB_HOST="${CHAT_DB_HOST:-localhost}"
DB_PORT="${CHAT_DB_PORT:-5432}"
DB_NAME="${CHAT_DB_NAME:-rocketchat_converted}"
DB_USER="${CHAT_DB_USER:-rocketchat_user}"
# Check if database connection info is available
if [ -z "$CHAT_DB_PASSWORD" ]; then
echo "⚠️ CHAT_DB_PASSWORD environment variable not set"
echo "Please set it with: export CHAT_DB_PASSWORD='your_password'"
exit 1
fi
# Find the migration directory
MIGRATION_DIR=""
if [ -d "/tmp" ]; then
MIGRATION_DIR=$(find /tmp -maxdepth 1 -name "chat-migration-*" -type d | head -1)
fi
if [ -z "$MIGRATION_DIR" ]; then
echo "❌ No migration directory found in /tmp/"
echo "Please specify the migration directory:"
read -p "Enter full path to migration directory: " MIGRATION_DIR
fi
if [ ! -d "$MIGRATION_DIR" ]; then
echo "❌ Migration directory not found: $MIGRATION_DIR"
exit 1
fi
echo "📁 Using migration directory: $MIGRATION_DIR"
echo "📊 Target database: $DB_NAME on $DB_HOST:$DB_PORT"
# Verify required files exist
REQUIRED_FILES=("chat-schema.sql" "chat-data.sql" "chat-files.tar.gz")
for file in "${REQUIRED_FILES[@]}"; do
if [ ! -f "$MIGRATION_DIR/$file" ]; then
echo "❌ Required file not found: $MIGRATION_DIR/$file"
exit 1
fi
done
echo "✅ All required files found"
# Test database connection
echo "🔗 Testing database connection..."
PGPASSWORD="$CHAT_DB_PASSWORD" psql \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
-c "SELECT version();" > /dev/null
if [ $? -eq 0 ]; then
echo "✅ Database connection successful"
else
echo "❌ Database connection failed"
echo "Please ensure:"
echo " 1. PostgreSQL is running"
echo " 2. Database '$DB_NAME' exists"
echo " 3. User '$DB_USER' has access"
echo " 4. Password is correct"
exit 1
fi
# Import database schema
echo "📋 Importing database schema..."
PGPASSWORD="$CHAT_DB_PASSWORD" psql \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
-f "$MIGRATION_DIR/chat-schema.sql"
if [ $? -eq 0 ]; then
echo "✅ Schema imported successfully"
else
echo "❌ Schema import failed"
exit 1
fi
# Import database data
echo "💾 Importing database data..."
echo " This may take a while depending on data size..."
PGPASSWORD="$CHAT_DB_PASSWORD" psql \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
-f "$MIGRATION_DIR/chat-data.sql"
if [ $? -eq 0 ]; then
echo "✅ Data imported successfully"
else
echo "❌ Data import failed"
echo "Check the error messages above for details"
exit 1
fi
# Create files directory and import files
echo "📎 Setting up files directory..."
mkdir -p "db-convert/db"
if [ -s "$MIGRATION_DIR/chat-files.tar.gz" ]; then
echo "📂 Extracting chat files..."
cd db-convert/db
tar -xzf "$MIGRATION_DIR/chat-files.tar.gz"
cd ../..
# Set proper permissions
if [ -d "db-convert/db/files" ]; then
chmod -R 755 db-convert/db/files
echo "✅ Files imported and permissions set"
else
echo "⚠️ Files directory not created properly"
fi
else
echo " No files to import (empty archive)"
mkdir -p "db-convert/db/files/uploads"
mkdir -p "db-convert/db/files/avatars"
fi
# Get final table statistics
echo "📈 Generating import statistics..."
PGPASSWORD="$CHAT_DB_PASSWORD" psql \
-h "$DB_HOST" \
-p "$DB_PORT" \
-U "$DB_USER" \
-d "$DB_NAME" \
-c "
SELECT
tablename,
n_live_tup as row_count
FROM pg_stat_user_tables
WHERE schemaname = 'public'
ORDER BY n_live_tup DESC;
"
# Create import summary
echo ""
echo "🎉 Import completed successfully!"
echo ""
echo "📋 Import Summary:"
echo " Database: $DB_NAME"
echo " Host: $DB_HOST:$DB_PORT"
echo " Files location: $(pwd)/db-convert/db/files/"
echo ""
echo "🔍 Next steps:"
echo "1. Update your application configuration to use this database"
echo "2. Run verify-migration.js to validate the migration"
echo "3. Test your application thoroughly"
echo "4. Update DNS/load balancer to point to new server"
echo ""
echo "⚠️ Important:"
echo "- Keep the original data as backup until migration is fully validated"
echo "- Monitor the application closely after switching"
echo "- Have a rollback plan ready"

View File

@@ -0,0 +1,86 @@
# Chat Database Migration Guide
This guide will help you migrate your chat database from the current server to a new PostgreSQL server.
## Overview
Your chat system uses:
- Database: `rocketchat_converted` (PostgreSQL)
- Main tables: users, message, room, uploads, avatars, subscription
- File storage: db-convert/db/files/ directory with uploads and avatars
- Environment configuration for database connection
## Migration Steps
### 1. Pre-Migration Setup
On your **new server**, ensure PostgreSQL is installed and running:
```bash
# Install PostgreSQL (if not already done)
sudo apt update
sudo apt install postgresql postgresql-contrib
# Start PostgreSQL service
sudo systemctl start postgresql
sudo systemctl enable postgresql
```
### 2. Create Database Schema on New Server
Run the provided migration script:
```bash
# On new server
sudo -u postgres psql -f create-new-database.sql
```
### 3. Export Data from Current Server
Run the export script:
```bash
# On current server
./export-chat-data.sh
```
This will create:
- `chat-schema.sql` - Database schema
- `chat-data.sql` - All table data
- `chat-files.tar.gz` - All uploaded files and avatars
### 4. Transfer Data to New Server
```bash
# Copy files to new server
scp chat-schema.sql chat-data.sql chat-files.tar.gz user@new-server:/tmp/
```
### 5. Import Data on New Server
```bash
# On new server
./import-chat-data.sh
```
### 6. Update Configuration
Update your environment variables to point to the new database server.
### 7. Verify Migration
Run the verification script to ensure everything transferred correctly:
```bash
node verify-migration.js
```
## Files Provided
1. `create-new-database.sql` - Creates database and user on new server
2. `export-chat-data.sh` - Exports data from current server
3. `import-chat-data.sh` - Imports data to new server
4. `verify-migration.js` - Verifies data integrity
5. `update-config-template.env` - Template for new configuration
## Important Notes
- **Backup first**: Always backup your current database before migration
- **Downtime**: Plan for application downtime during migration
- **File permissions**: Ensure file permissions are preserved during transfer
- **Network access**: Ensure new server can accept connections from your application

1446
inventory-server/chat/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
{
"name": "chat-server",
"version": "1.0.0",
"description": "Chat archive server for Rocket.Chat data",
"main": "server.js",
"scripts": {
"start": "node server.js",
"dev": "nodemon server.js"
},
"dependencies": {
"express": "^4.18.2",
"cors": "^2.8.5",
"pg": "^8.11.0",
"dotenv": "^16.0.3",
"morgan": "^1.10.0"
},
"devDependencies": {
"nodemon": "^2.0.22"
}
}

View File

@@ -0,0 +1,649 @@
const express = require('express');
const path = require('path');
const router = express.Router();
// Serve uploaded files with proper mapping from database paths to actual file locations
router.get('/files/uploads/*', async (req, res) => {
try {
// Extract the path from the URL (everything after /files/uploads/)
const requestPath = req.params[0];
// The URL path will be like: ufs/AmazonS3:Uploads/274Mf9CyHNG72oF86/filename.jpg
// We need to extract the mongo_id (274Mf9CyHNG72oF86) from this path
const pathParts = requestPath.split('/');
let mongoId = null;
// Find the mongo_id in the path structure
for (let i = 0; i < pathParts.length; i++) {
if (pathParts[i].includes('AmazonS3:Uploads') && i + 1 < pathParts.length) {
mongoId = pathParts[i + 1];
break;
}
// Sometimes the mongo_id might be the last part of ufs/AmazonS3:Uploads/mongoId
if (pathParts[i] === 'AmazonS3:Uploads' && i + 1 < pathParts.length) {
mongoId = pathParts[i + 1];
break;
}
}
if (!mongoId) {
// Try to get mongo_id from database by matching the full path
const result = await global.pool.query(`
SELECT mongo_id, name, type
FROM uploads
WHERE path = $1 OR url = $1
LIMIT 1
`, [`/ufs/AmazonS3:Uploads/${requestPath}`, `/ufs/AmazonS3:Uploads/${requestPath}`]);
if (result.rows.length > 0) {
mongoId = result.rows[0].mongo_id;
}
}
if (!mongoId) {
return res.status(404).json({ error: 'File not found' });
}
// The actual file is stored with just the mongo_id as filename
const filePath = path.join(__dirname, 'db-convert/db/files/uploads', mongoId);
// Get file info from database for proper content-type
const fileInfo = await global.pool.query(`
SELECT name, type
FROM uploads
WHERE mongo_id = $1
LIMIT 1
`, [mongoId]);
if (fileInfo.rows.length === 0) {
return res.status(404).json({ error: 'File metadata not found' });
}
const { name, type } = fileInfo.rows[0];
// Set proper content type
if (type) {
res.set('Content-Type', type);
}
// Set content disposition with original filename
if (name) {
res.set('Content-Disposition', `inline; filename="${name}"`);
}
// Send the file
res.sendFile(filePath, (err) => {
if (err) {
console.error('Error serving file:', err);
if (!res.headersSent) {
res.status(404).json({ error: 'File not found on disk' });
}
}
});
} catch (error) {
console.error('Error serving upload:', error);
res.status(500).json({ error: 'Server error' });
}
});
// Also serve files directly by mongo_id for simpler access
router.get('/files/by-id/:mongoId', async (req, res) => {
try {
const { mongoId } = req.params;
// Get file info from database
const fileInfo = await global.pool.query(`
SELECT name, type
FROM uploads
WHERE mongo_id = $1
LIMIT 1
`, [mongoId]);
if (fileInfo.rows.length === 0) {
return res.status(404).json({ error: 'File not found' });
}
const { name, type } = fileInfo.rows[0];
const filePath = path.join(__dirname, 'db-convert/db/files/uploads', mongoId);
// Set proper content type and filename
if (type) {
res.set('Content-Type', type);
}
if (name) {
res.set('Content-Disposition', `inline; filename="${name}"`);
}
// Send the file
res.sendFile(filePath, (err) => {
if (err) {
console.error('Error serving file:', err);
if (!res.headersSent) {
res.status(404).json({ error: 'File not found on disk' });
}
}
});
} catch (error) {
console.error('Error serving upload by ID:', error);
res.status(500).json({ error: 'Server error' });
}
});
// Serve user avatars by mongo_id
router.get('/avatar/:mongoId', async (req, res) => {
try {
const { mongoId } = req.params;
console.log(`[Avatar Debug] Looking up avatar for user mongo_id: ${mongoId}`);
// First try to find avatar by user's avataretag
const userResult = await global.pool.query(`
SELECT avataretag, username FROM users WHERE mongo_id = $1
`, [mongoId]);
let avatarPath = null;
if (userResult.rows.length > 0) {
const username = userResult.rows[0].username;
const avataretag = userResult.rows[0].avataretag;
// Try method 1: Look up by avataretag -> etag (for users with avataretag set)
if (avataretag) {
console.log(`[Avatar Debug] Found user ${username} with avataretag: ${avataretag}`);
const avatarResult = await global.pool.query(`
SELECT url, path FROM avatars WHERE etag = $1
`, [avataretag]);
if (avatarResult.rows.length > 0) {
const dbPath = avatarResult.rows[0].path || avatarResult.rows[0].url;
console.log(`[Avatar Debug] Found avatar record with path: ${dbPath}`);
if (dbPath) {
const pathParts = dbPath.split('/');
for (let i = 0; i < pathParts.length; i++) {
if (pathParts[i].includes('AmazonS3:Avatars') && i + 1 < pathParts.length) {
const avatarMongoId = pathParts[i + 1];
avatarPath = path.join(__dirname, 'db-convert/db/files/avatars', avatarMongoId);
console.log(`[Avatar Debug] Extracted avatar mongo_id: ${avatarMongoId}, full path: ${avatarPath}`);
break;
}
}
}
} else {
console.log(`[Avatar Debug] No avatar record found for etag: ${avataretag}`);
}
}
// Try method 2: Look up by userid directly (for users without avataretag)
if (!avatarPath) {
console.log(`[Avatar Debug] Trying direct userid lookup for user ${username} (${mongoId})`);
const avatarResult = await global.pool.query(`
SELECT url, path FROM avatars WHERE userid = $1
`, [mongoId]);
if (avatarResult.rows.length > 0) {
const dbPath = avatarResult.rows[0].path || avatarResult.rows[0].url;
console.log(`[Avatar Debug] Found avatar record by userid with path: ${dbPath}`);
if (dbPath) {
const pathParts = dbPath.split('/');
for (let i = 0; i < pathParts.length; i++) {
if (pathParts[i].includes('AmazonS3:Avatars') && i + 1 < pathParts.length) {
const avatarMongoId = pathParts[i + 1];
avatarPath = path.join(__dirname, 'db-convert/db/files/avatars', avatarMongoId);
console.log(`[Avatar Debug] Extracted avatar mongo_id: ${avatarMongoId}, full path: ${avatarPath}`);
break;
}
}
}
} else {
console.log(`[Avatar Debug] No avatar record found for userid: ${mongoId}`);
}
}
} else {
console.log(`[Avatar Debug] No user found for mongo_id: ${mongoId}`);
}
// Fallback: try direct lookup by user mongo_id
if (!avatarPath) {
avatarPath = path.join(__dirname, 'db-convert/db/files/avatars', mongoId);
console.log(`[Avatar Debug] Using fallback path: ${avatarPath}`);
}
// Set proper content type for images
res.set('Content-Type', 'image/jpeg'); // Most avatars are likely JPEG
// Send the file
res.sendFile(avatarPath, (err) => {
if (err) {
// If avatar doesn't exist, send a default 404 or generate initials
console.log(`[Avatar Debug] Avatar file not found at path: ${avatarPath}, error:`, err.message);
if (!res.headersSent) {
res.status(404).json({ error: 'Avatar not found' });
}
} else {
console.log(`[Avatar Debug] Successfully served avatar from: ${avatarPath}`);
}
});
} catch (error) {
console.error('Error serving avatar:', error);
res.status(500).json({ error: 'Server error' });
}
});
// Serve avatars statically as fallback
router.use('/files/avatars', express.static(path.join(__dirname, 'db-convert/db/files/avatars')));
// Get all users for the "view as" dropdown (active and inactive)
router.get('/users', async (req, res) => {
try {
const result = await global.pool.query(`
SELECT id, username, name, type, active, status, lastlogin,
statustext, utcoffset, statusconnection, mongo_id, avataretag
FROM users
WHERE type = 'user'
ORDER BY
active DESC, -- Active users first
CASE
WHEN status = 'online' THEN 1
WHEN status = 'away' THEN 2
WHEN status = 'busy' THEN 3
ELSE 4
END,
name ASC
`);
res.json({
status: 'success',
users: result.rows
});
} catch (error) {
console.error('Error fetching users:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch users',
details: error.message
});
}
});
// Get rooms for a specific user with enhanced room names for direct messages
router.get('/users/:userId/rooms', async (req, res) => {
const { userId } = req.params;
try {
// Get the current user's mongo_id for filtering
const userResult = await global.pool.query(`
SELECT mongo_id, username FROM users WHERE id = $1
`, [userId]);
if (userResult.rows.length === 0) {
return res.status(404).json({
status: 'error',
error: 'User not found'
});
}
const currentUserMongoId = userResult.rows[0].mongo_id;
const currentUsername = userResult.rows[0].username;
// Get rooms where the user is a member with proper naming from subscription table
// Include archived and closed rooms but sort them at the bottom
const result = await global.pool.query(`
SELECT DISTINCT
r.id,
r.mongo_id as room_mongo_id,
r.name,
r.fname,
r.t as type,
r.msgs,
r.lm as last_message_date,
r.usernames,
r.uids,
r.userscount,
r.description,
r.teamid,
r.archived,
s.open,
-- Use the subscription's name for direct messages (excludes current user)
-- For channels/groups, use room's fname or name
CASE
WHEN r.t = 'd' THEN COALESCE(s.fname, s.name, 'Unknown User')
ELSE COALESCE(r.fname, r.name, 'Unnamed Room')
END as display_name
FROM room r
JOIN subscription s ON s.rid = r.mongo_id
WHERE s.u->>'_id' = $1
ORDER BY
s.open DESC NULLS LAST, -- Open rooms first
r.archived NULLS FIRST, -- Non-archived first (nulls treated as false)
r.lm DESC NULLS LAST
LIMIT 50
`, [currentUserMongoId]);
// Enhance rooms with participant information for direct messages
const enhancedRooms = await Promise.all(result.rows.map(async (room) => {
if (room.type === 'd' && room.uids) {
// Get participant info (excluding current user) for direct messages
const participantResult = await global.pool.query(`
SELECT u.username, u.name, u.mongo_id, u.avataretag
FROM users u
WHERE u.mongo_id = ANY($1::text[])
AND u.mongo_id != $2
`, [room.uids, currentUserMongoId]);
room.participants = participantResult.rows;
}
return room;
}));
res.json({
status: 'success',
rooms: enhancedRooms
});
} catch (error) {
console.error('Error fetching user rooms:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch user rooms',
details: error.message
});
}
});
// Get room details including participants
router.get('/rooms/:roomId', async (req, res) => {
const { roomId } = req.params;
const { userId } = req.query; // Accept current user ID as query parameter
try {
const result = await global.pool.query(`
SELECT r.id, r.name, r.fname, r.t as type, r.msgs, r.description,
r.lm as last_message_date, r.usernames, r.uids, r.userscount, r.teamid
FROM room r
WHERE r.id = $1
`, [roomId]);
if (result.rows.length === 0) {
return res.status(404).json({
status: 'error',
error: 'Room not found'
});
}
const room = result.rows[0];
// For direct messages, get the proper display name based on current user
if (room.type === 'd' && room.uids && userId) {
// Get current user's mongo_id
const userResult = await global.pool.query(`
SELECT mongo_id FROM users WHERE id = $1
`, [userId]);
if (userResult.rows.length > 0) {
const currentUserMongoId = userResult.rows[0].mongo_id;
// Get display name from subscription table for this user
// Use room mongo_id to match with subscription.rid
const roomMongoResult = await global.pool.query(`
SELECT mongo_id FROM room WHERE id = $1
`, [roomId]);
if (roomMongoResult.rows.length > 0) {
const roomMongoId = roomMongoResult.rows[0].mongo_id;
const subscriptionResult = await global.pool.query(`
SELECT fname, name FROM subscription
WHERE rid = $1 AND u->>'_id' = $2
`, [roomMongoId, currentUserMongoId]);
if (subscriptionResult.rows.length > 0) {
const sub = subscriptionResult.rows[0];
room.display_name = sub.fname || sub.name || 'Unknown User';
}
}
}
// Get all participants for additional info
const participantResult = await global.pool.query(`
SELECT username, name
FROM users
WHERE mongo_id = ANY($1::text[])
`, [room.uids]);
room.participants = participantResult.rows;
} else {
// For channels/groups, use room's fname or name
room.display_name = room.fname || room.name || 'Unnamed Room';
}
res.json({
status: 'success',
room: room
});
} catch (error) {
console.error('Error fetching room details:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch room details',
details: error.message
});
}
});
// Get messages for a specific room (fast, without attachments)
router.get('/rooms/:roomId/messages', async (req, res) => {
const { roomId } = req.params;
const { limit = 50, offset = 0, before } = req.query;
try {
// Fast query - just get messages without expensive attachment joins
let query = `
SELECT m.id, m.msg, m.ts, m.u, m._updatedat, m.urls, m.mentions, m.md
FROM message m
JOIN room r ON m.rid = r.mongo_id
WHERE r.id = $1
`;
const params = [roomId];
if (before) {
query += ` AND m.ts < $${params.length + 1}`;
params.push(before);
}
query += ` ORDER BY m.ts DESC LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
params.push(limit, offset);
const result = await global.pool.query(query, params);
// Add empty attachments array for now - attachments will be loaded separately if needed
const messages = result.rows.map(msg => ({
...msg,
attachments: []
}));
res.json({
status: 'success',
messages: messages.reverse() // Reverse to show oldest first
});
} catch (error) {
console.error('Error fetching messages:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch messages',
details: error.message
});
}
});
// Get attachments for specific messages (called separately for performance)
router.post('/messages/attachments', async (req, res) => {
const { messageIds } = req.body;
if (!messageIds || !Array.isArray(messageIds) || messageIds.length === 0) {
return res.json({ status: 'success', attachments: {} });
}
try {
// Get room mongo_id from first message to limit search scope
const roomQuery = await global.pool.query(`
SELECT r.mongo_id as room_mongo_id
FROM message m
JOIN room r ON m.rid = r.mongo_id
WHERE m.id = $1
LIMIT 1
`, [messageIds[0]]);
if (roomQuery.rows.length === 0) {
return res.json({ status: 'success', attachments: {} });
}
const roomMongoId = roomQuery.rows[0].room_mongo_id;
// Get messages and their upload timestamps
const messagesQuery = await global.pool.query(`
SELECT m.id, m.ts, m.u->>'_id' as user_id
FROM message m
WHERE m.id = ANY($1::int[])
`, [messageIds]);
if (messagesQuery.rows.length === 0) {
return res.json({ status: 'success', attachments: {} });
}
// Build a map of user_id -> array of message timestamps for efficient lookup
const userTimeMap = {};
const messageMap = {};
messagesQuery.rows.forEach(msg => {
if (!userTimeMap[msg.user_id]) {
userTimeMap[msg.user_id] = [];
}
userTimeMap[msg.user_id].push(msg.ts);
messageMap[msg.id] = { ts: msg.ts, user_id: msg.user_id };
});
// Get attachments for this room and these users
const uploadsQuery = await global.pool.query(`
SELECT mongo_id, name, size, type, url, path, typegroup, identify,
userid, uploadedat
FROM uploads
WHERE rid = $1
AND userid = ANY($2::text[])
ORDER BY uploadedat
`, [roomMongoId, Object.keys(userTimeMap)]);
// Match attachments to messages based on timestamp proximity (within 5 minutes)
const attachmentsByMessage = {};
uploadsQuery.rows.forEach(upload => {
const uploadTime = new Date(upload.uploadedat).getTime();
// Find the closest message from this user within 5 minutes
let closestMessageId = null;
let closestTimeDiff = Infinity;
Object.entries(messageMap).forEach(([msgId, msgData]) => {
if (msgData.user_id === upload.userid) {
const msgTime = new Date(msgData.ts).getTime();
const timeDiff = Math.abs(uploadTime - msgTime);
if (timeDiff < 300000 && timeDiff < closestTimeDiff) { // 5 minutes = 300000ms
closestMessageId = msgId;
closestTimeDiff = timeDiff;
}
}
});
if (closestMessageId) {
if (!attachmentsByMessage[closestMessageId]) {
attachmentsByMessage[closestMessageId] = [];
}
attachmentsByMessage[closestMessageId].push({
id: upload.id,
mongo_id: upload.mongo_id,
name: upload.name,
size: upload.size,
type: upload.type,
url: upload.url,
path: upload.path,
typegroup: upload.typegroup,
identify: upload.identify
});
}
});
res.json({
status: 'success',
attachments: attachmentsByMessage
});
} catch (error) {
console.error('Error fetching message attachments:', error);
res.status(500).json({
status: 'error',
error: 'Failed to fetch attachments',
details: error.message
});
}
});
// Search messages in accessible rooms for a user
router.get('/users/:userId/search', async (req, res) => {
const { userId } = req.params;
const { q, limit = 20 } = req.query;
if (!q || q.length < 2) {
return res.status(400).json({
status: 'error',
error: 'Search query must be at least 2 characters'
});
}
try {
const userResult = await global.pool.query(`
SELECT mongo_id FROM users WHERE id = $1
`, [userId]);
if (userResult.rows.length === 0) {
return res.status(404).json({
status: 'error',
error: 'User not found'
});
}
const currentUserMongoId = userResult.rows[0].mongo_id;
const result = await global.pool.query(`
SELECT m.id, m.msg, m.ts, m.u, r.id as room_id, r.name as room_name, r.fname as room_fname, r.t as room_type
FROM message m
JOIN room r ON m.rid = r.mongo_id
JOIN subscription s ON s.rid = r.mongo_id AND s.u->>'_id' = $1
WHERE m.msg ILIKE $2
AND r.archived IS NOT TRUE
ORDER BY m.ts DESC
LIMIT $3
`, [currentUserMongoId, `%${q}%`, limit]);
res.json({
status: 'success',
results: result.rows
});
} catch (error) {
console.error('Error searching messages:', error);
res.status(500).json({
status: 'error',
error: 'Failed to search messages',
details: error.message
});
}
});
module.exports = router;

View File

@@ -0,0 +1,83 @@
require('dotenv').config({ path: '../.env' });
const express = require('express');
const cors = require('cors');
const { Pool } = require('pg');
const morgan = require('morgan');
const chatRoutes = require('./routes');
// Log startup configuration
console.log('Starting chat server with config:', {
host: process.env.CHAT_DB_HOST,
user: process.env.CHAT_DB_USER,
database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
port: process.env.CHAT_DB_PORT,
chat_port: process.env.CHAT_PORT || 3014
});
const app = express();
const port = process.env.CHAT_PORT || 3014;
// Database configuration for rocketchat_converted database
const pool = new Pool({
host: process.env.CHAT_DB_HOST,
user: process.env.CHAT_DB_USER,
password: process.env.CHAT_DB_PASSWORD,
database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
port: process.env.CHAT_DB_PORT,
});
// Make pool available globally
global.pool = pool;
// Middleware
app.use(express.json());
app.use(morgan('combined'));
app.use(cors({
origin: ['http://localhost:5175', 'http://localhost:5174', 'https://inventory.kent.pw', 'https://acot.site'],
credentials: true
}));
// Test database connection endpoint
app.get('/test-db', async (req, res) => {
try {
const result = await pool.query('SELECT COUNT(*) as user_count FROM users WHERE active = true');
const messageResult = await pool.query('SELECT COUNT(*) as message_count FROM message');
const roomResult = await pool.query('SELECT COUNT(*) as room_count FROM room');
res.json({
status: 'success',
database: 'rocketchat_converted',
stats: {
active_users: parseInt(result.rows[0].user_count),
total_messages: parseInt(messageResult.rows[0].message_count),
total_rooms: parseInt(roomResult.rows[0].room_count)
}
});
} catch (error) {
console.error('Database test error:', error);
res.status(500).json({
status: 'error',
error: 'Database connection failed',
details: error.message
});
}
});
// Mount all routes from routes.js
app.use('/', chatRoutes);
// Health check endpoint
app.get('/health', (req, res) => {
res.json({ status: 'healthy' });
});
// Error handling middleware
app.use((err, req, res, next) => {
console.error(err.stack);
res.status(500).json({ error: 'Something broke!' });
});
// Start server
app.listen(port, () => {
console.log(`Chat server running on port ${port}`);
});

View File

@@ -0,0 +1,26 @@
# Chat Server Database Configuration Template
# Copy this to your .env file and update the values for your new server
# Database Configuration for New Server
CHAT_DB_HOST=your-new-server-ip-or-hostname
CHAT_DB_PORT=5432
CHAT_DB_NAME=rocketchat_converted
CHAT_DB_USER=rocketchat_user
CHAT_DB_PASSWORD=your-secure-password
# Chat Server Port
CHAT_PORT=3014
# Example configuration:
# CHAT_DB_HOST=192.168.1.100
# CHAT_DB_PORT=5432
# CHAT_DB_NAME=rocketchat_converted
# CHAT_DB_USER=rocketchat_user
# CHAT_DB_PASSWORD=MySecureP@ssw0rd123
# Notes:
# - Replace 'your-new-server-ip-or-hostname' with actual server address
# - Use a strong password for CHAT_DB_PASSWORD
# - Ensure the new server allows connections from your application server
# - Update any firewall rules to allow PostgreSQL connections (port 5432)
# - Test connectivity before updating production configuration

View File

@@ -0,0 +1,231 @@
#!/usr/bin/env node
/**
* Chat Database Migration Verification Script
*
* This script verifies that the chat database migration was successful
* by comparing record counts and testing basic functionality.
*/
require('dotenv').config({ path: '../.env' });
const { Pool } = require('pg');
// Database configuration
const pool = new Pool({
host: process.env.CHAT_DB_HOST || 'localhost',
user: process.env.CHAT_DB_USER || 'rocketchat_user',
password: process.env.CHAT_DB_PASSWORD,
database: process.env.CHAT_DB_NAME || 'rocketchat_converted',
port: process.env.CHAT_DB_PORT || 5432,
});
const originalStats = process.argv[2] ? JSON.parse(process.argv[2]) : null;
async function verifyMigration() {
console.log('🔍 Starting migration verification...\n');
try {
// Test basic connection
console.log('🔗 Testing database connection...');
const versionResult = await pool.query('SELECT version()');
console.log('✅ Database connection successful');
console.log(` PostgreSQL version: ${versionResult.rows[0].version.split(' ')[1]}\n`);
// Get table statistics
console.log('📊 Checking table statistics...');
const statsResult = await pool.query(`
SELECT
tablename,
n_live_tup as row_count,
n_dead_tup as dead_rows,
schemaname
FROM pg_stat_user_tables
WHERE schemaname = 'public'
ORDER BY n_live_tup DESC
`);
if (statsResult.rows.length === 0) {
console.log('❌ No tables found! Migration may have failed.');
return false;
}
console.log('📋 Table Statistics:');
console.log(' Table Name | Row Count | Dead Rows');
console.log(' -------------------|-----------|----------');
let totalRows = 0;
const tableStats = {};
for (const row of statsResult.rows) {
const rowCount = parseInt(row.row_count) || 0;
const deadRows = parseInt(row.dead_rows) || 0;
totalRows += rowCount;
tableStats[row.tablename] = rowCount;
console.log(` ${row.tablename.padEnd(18)} | ${rowCount.toString().padStart(9)} | ${deadRows.toString().padStart(8)}`);
}
console.log(`\n Total rows across all tables: ${totalRows}\n`);
// Verify critical tables exist and have data
const criticalTables = ['users', 'message', 'room'];
console.log('🔑 Checking critical tables...');
for (const table of criticalTables) {
if (tableStats[table] > 0) {
console.log(`${table}: ${tableStats[table]} rows`);
} else if (tableStats[table] === 0) {
console.log(`⚠️ ${table}: table exists but is empty`);
} else {
console.log(`${table}: table not found`);
return false;
}
}
// Test specific functionality
console.log('\n🧪 Testing specific functionality...');
// Test users table
const userTest = await pool.query(`
SELECT COUNT(*) as total_users,
COUNT(*) FILTER (WHERE active = true) as active_users,
COUNT(*) FILTER (WHERE type = 'user') as regular_users
FROM users
`);
if (userTest.rows[0]) {
const { total_users, active_users, regular_users } = userTest.rows[0];
console.log(`✅ Users: ${total_users} total, ${active_users} active, ${regular_users} regular users`);
}
// Test messages table
const messageTest = await pool.query(`
SELECT COUNT(*) as total_messages,
COUNT(DISTINCT rid) as unique_rooms,
MIN(ts) as oldest_message,
MAX(ts) as newest_message
FROM message
`);
if (messageTest.rows[0]) {
const { total_messages, unique_rooms, oldest_message, newest_message } = messageTest.rows[0];
console.log(`✅ Messages: ${total_messages} total across ${unique_rooms} rooms`);
if (oldest_message && newest_message) {
console.log(` Date range: ${oldest_message.toISOString().split('T')[0]} to ${newest_message.toISOString().split('T')[0]}`);
}
}
// Test rooms table
const roomTest = await pool.query(`
SELECT COUNT(*) as total_rooms,
COUNT(*) FILTER (WHERE t = 'c') as channels,
COUNT(*) FILTER (WHERE t = 'p') as private_groups,
COUNT(*) FILTER (WHERE t = 'd') as direct_messages
FROM room
`);
if (roomTest.rows[0]) {
const { total_rooms, channels, private_groups, direct_messages } = roomTest.rows[0];
console.log(`✅ Rooms: ${total_rooms} total (${channels} channels, ${private_groups} private, ${direct_messages} DMs)`);
}
// Test file uploads if table exists
if (tableStats.uploads > 0) {
const uploadTest = await pool.query(`
SELECT COUNT(*) as total_uploads,
COUNT(DISTINCT typegroup) as file_types,
pg_size_pretty(SUM(size)) as total_size
FROM uploads
WHERE size IS NOT NULL
`);
if (uploadTest.rows[0]) {
const { total_uploads, file_types, total_size } = uploadTest.rows[0];
console.log(`✅ Uploads: ${total_uploads} files, ${file_types} types, ${total_size || 'unknown size'}`);
}
}
// Test server health endpoint simulation
console.log('\n🏥 Testing application endpoints simulation...');
try {
const healthTest = await pool.query(`
SELECT
(SELECT COUNT(*) FROM users WHERE active = true) as active_users,
(SELECT COUNT(*) FROM message) as total_messages,
(SELECT COUNT(*) FROM room) as total_rooms
`);
if (healthTest.rows[0]) {
const stats = healthTest.rows[0];
console.log('✅ Health check simulation passed');
console.log(` Active users: ${stats.active_users}`);
console.log(` Total messages: ${stats.total_messages}`);
console.log(` Total rooms: ${stats.total_rooms}`);
}
} catch (error) {
console.log(`⚠️ Health check simulation failed: ${error.message}`);
}
// Check indexes
console.log('\n📇 Checking database indexes...');
const indexResult = await pool.query(`
SELECT
schemaname,
tablename,
indexname,
indexdef
FROM pg_indexes
WHERE schemaname = 'public'
ORDER BY tablename, indexname
`);
const indexesByTable = {};
for (const idx of indexResult.rows) {
if (!indexesByTable[idx.tablename]) {
indexesByTable[idx.tablename] = [];
}
indexesByTable[idx.tablename].push(idx.indexname);
}
for (const [table, indexes] of Object.entries(indexesByTable)) {
console.log(` ${table}: ${indexes.length} indexes`);
}
console.log('\n🎉 Migration verification completed successfully!');
console.log('\n✅ Summary:');
console.log(` - Database connection: Working`);
console.log(` - Tables created: ${statsResult.rows.length}`);
console.log(` - Total data rows: ${totalRows}`);
console.log(` - Critical tables: All present`);
console.log(` - Indexes: ${indexResult.rows.length} total`);
console.log('\n🚀 Next steps:');
console.log(' 1. Update your application configuration');
console.log(' 2. Start your chat server');
console.log(' 3. Test chat functionality in the browser');
console.log(' 4. Monitor logs for any issues');
return true;
} catch (error) {
console.error('❌ Migration verification failed:', error.message);
console.error('\n🔧 Troubleshooting steps:');
console.error(' 1. Check database connection settings');
console.error(' 2. Verify database and user exist');
console.error(' 3. Check PostgreSQL logs');
console.error(' 4. Ensure import completed without errors');
return false;
} finally {
await pool.end();
}
}
// Run verification
if (require.main === module) {
verifyMigration().then(success => {
process.exit(success ? 0 : 1);
});
}
module.exports = { verifyMigration };