#!/usr/bin/env python3 """ Industry Standard Data Validation and Repair Tool Comprehensive fix for ThreeFold Marketplace user data files """ import json import os import sys from pathlib import Path def normalize_activity_type(activity_type): """Normalize activity type to match enum variants""" mapping = { "ServiceProgress": "ServiceCreated", "AppDeployment": "Deployment", "AppCreated": "AppPublished", "NodeCreated": "NodeAdded", "NodeModified": "NodeUpdated", "WalletDeposit": "WalletTransaction", "WalletWithdraw": "WalletTransaction", "Payment": "WalletTransaction", "ProfileChanged": "ProfileUpdate", "ConfigChange": "SettingsChange", "BrowseMarketplace": "MarketplaceView", "SliceCreation": "SliceCreated", "SliceAssignment": "SliceAllocated", "SliceRemoval": "SliceReleased", } # Valid variants pass through unchanged valid_variants = { "Login", "Purchase", "Deployment", "ServiceCreated", "AppPublished", "NodeAdded", "NodeUpdated", "WalletTransaction", "ProfileUpdate", "SettingsChange", "MarketplaceView", "SliceCreated", "SliceAllocated", "SliceReleased" } if activity_type in valid_variants: return activity_type return mapping.get(activity_type, "ProfileUpdate") def infer_category_from_activity_type(activity_type): """Infer category from activity type""" mapping = { "ServiceCreated": "Service", "AppPublished": "App", "Deployment": "App", "NodeAdded": "Farming", "NodeUpdated": "Farming", "SliceCreated": "Farming", "SliceAllocated": "Farming", "SliceReleased": "Farming", "WalletTransaction": "Finance", "Login": "Account", "ProfileUpdate": "Account", "SettingsChange": "Account", "Purchase": "Marketplace", "MarketplaceView": "Marketplace", } return mapping.get(activity_type, "General") def repair_user_activities(data): """Repair user activities to match schema""" if "user_activities" not in data or data["user_activities"] is None: data["user_activities"] = [] return for activity in data["user_activities"]: if activity is None: continue # Fix activity_type if "activity_type" in activity and activity["activity_type"] is not None: activity["activity_type"] = normalize_activity_type(activity["activity_type"]) # Ensure category field exists if "category" not in activity: activity_type = activity.get("activity_type", "Service") if activity_type is not None: activity["category"] = infer_category_from_activity_type(activity_type) else: activity["category"] = "General" def repair_farmer_settings(data): """Repair farmer settings to include required fields""" if "farmer_settings" not in data or data["farmer_settings"] is None: data["farmer_settings"] = {} farmer_settings = data["farmer_settings"] # Ensure minimum_deployment_duration exists if "minimum_deployment_duration" not in farmer_settings: farmer_settings["minimum_deployment_duration"] = 24 # Ensure preferred_regions exists if "preferred_regions" not in farmer_settings: farmer_settings["preferred_regions"] = ["NA", "EU"] def ensure_required_fields(data): """Ensure all required top-level fields exist""" required_fields = { "user_email": "unknown@example.com", "wallet_balance": "0.0", "transactions": [], "services": [], "service_requests": [], "apps": [], "app_deployments": [], "nodes": [], "farmer_earnings": [], "user_activities": [], "pool_positions": {}, } for field, default_value in required_fields.items(): if field not in data: data[field] = default_value def validate_and_repair_user_data(json_str): """Validate and repair user data JSON""" try: data = json.loads(json_str) repair_user_activities(data) repair_farmer_settings(data) ensure_required_fields(data) return json.dumps(data, indent=2, ensure_ascii=False) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON: {e}") def validate_all_user_files(): """Validate all user data files""" user_data_dir = Path("user_data") if not user_data_dir.exists(): raise FileNotFoundError("user_data directory not found") results = [] for json_file in user_data_dir.glob("*.json"): filename = json_file.name try: content = json_file.read_text(encoding='utf-8') repaired_content = validate_and_repair_user_data(content) # Write back the repaired content json_file.write_text(repaired_content, encoding='utf-8') results.append(f"✅ {filename}: Successfully validated and repaired") except Exception as e: results.append(f"❌ {filename}: {e}") return results def main(): print("🔧 ThreeFold Marketplace Data Validator") print("========================================") print() try: results = validate_all_user_files() print("📊 Validation Results:") print() for result in results: print(f" {result}") print() success_count = sum(1 for r in results if r.startswith("✅")) error_count = sum(1 for r in results if r.startswith("❌")) print("📈 Summary:") print(f" ✅ Successfully processed: {success_count}") print(f" ❌ Errors encountered: {error_count}") if error_count == 0: print() print("🎉 All user data files are now valid and ready for use!") return 0 else: print() print("⚠️ Some files had errors. Please review the output above.") return 1 except Exception as e: print(f"❌ Validation failed: {e}") return 1 if __name__ == "__main__": sys.exit(main())