//! Standalone data cleanup utility //! Run with: cargo run --bin cleanup use std::collections::HashMap; fn main() { // Initialize logging env_logger::init(); println!("๐Ÿงน Project Mycelium Data Cleanup Utility"); println!("=============================================="); // Manually clean up user1's duplicate nodes match cleanup_user1_data() { Ok(changes) => { println!("โœ… Cleanup completed successfully!"); if changes > 0 { println!("๐Ÿ“Š Changes made: {}", changes); } else { println!("๐Ÿ“Š No changes needed - data is already clean"); } } Err(e) => { println!("โŒ Cleanup failed: {}", e); std::process::exit(1); } } } fn cleanup_user1_data() -> Result { use serde_json::Value; let file_path = "./user_data/user1_at_example_com.json"; // Read the current data let data_str = std::fs::read_to_string(file_path) .map_err(|e| format!("Failed to read file: {}", e))?; let mut data: Value = serde_json::from_str(&data_str) .map_err(|e| format!("Failed to parse JSON: {}", e))?; // Get the nodes array let nodes = data.get_mut("nodes") .and_then(|n| n.as_array_mut()) .ok_or("No nodes array found")?; let original_count = nodes.len(); println!("๐Ÿ“Š Found {} nodes before cleanup", original_count); // Group nodes by grid_node_id let mut node_groups: HashMap> = HashMap::new(); for (index, node) in nodes.iter().enumerate() { if let Some(grid_id) = node.get("grid_node_id").and_then(|id| id.as_u64()) { node_groups.entry(grid_id as u32) .or_insert_with(Vec::new) .push((index, node.clone())); } } // Find and resolve duplicates let mut nodes_to_keep = Vec::new(); let mut duplicates_removed = 0; for (grid_id, mut group_nodes) in node_groups { if group_nodes.len() > 1 { println!("๐Ÿ” Found {} duplicate nodes for grid_node_id: {}", group_nodes.len(), grid_id); // Sort by quality: prefer nodes with slice combinations and marketplace SLA group_nodes.sort_by(|a, b| { let score_a = calculate_node_quality_score(&a.1); let score_b = calculate_node_quality_score(&b.1); score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal) }); // Keep the best node, merge data from others let mut best_node = group_nodes[0].1.clone(); // Merge slice data from other nodes if the best node is missing it for (_, other_node) in &group_nodes[1..] { if best_node.get("available_combinations") .and_then(|ac| ac.as_array()) .map_or(true, |arr| arr.is_empty()) { if let Some(other_combinations) = other_node.get("available_combinations") { if let Some(other_array) = other_combinations.as_array() { if !other_array.is_empty() { if let Some(best_obj) = best_node.as_object_mut() { best_obj.insert("available_combinations".to_string(), other_combinations.clone()); if let Some(total_slices) = other_node.get("total_base_slices") { best_obj.insert("total_base_slices".to_string(), total_slices.clone()); } if let Some(slice_calc) = other_node.get("slice_last_calculated") { best_obj.insert("slice_last_calculated".to_string(), slice_calc.clone()); } println!("๐Ÿ”„ Merged slice data from duplicate node"); } } } } } if best_node.get("marketplace_sla").is_none() { if let Some(other_sla) = other_node.get("marketplace_sla") { if let Some(best_obj) = best_node.as_object_mut() { best_obj.insert("marketplace_sla".to_string(), other_sla.clone()); println!("๐Ÿ”„ Merged marketplace SLA from duplicate node"); } } } if best_node.get("rental_options").is_none() { if let Some(other_rental) = other_node.get("rental_options") { if let Some(best_obj) = best_node.as_object_mut() { best_obj.insert("rental_options".to_string(), other_rental.clone()); println!("๐Ÿ”„ Merged rental options from duplicate node"); } } } } nodes_to_keep.push(best_node); duplicates_removed += group_nodes.len() - 1; println!("๐Ÿงน Removed {} duplicate nodes for grid_node_id: {}", group_nodes.len() - 1, grid_id); } else { // Single node, keep as is nodes_to_keep.push(group_nodes[0].1.clone()); } } // Update the data if let Some(data_obj) = data.as_object_mut() { data_obj.insert("nodes".to_string(), Value::Array(nodes_to_keep)); } // Write back to file let updated_data_str = serde_json::to_string_pretty(&data) .map_err(|e| format!("Failed to serialize JSON: {}", e))?; std::fs::write(file_path, updated_data_str) .map_err(|e| format!("Failed to write file: {}", e))?; let final_count = data.get("nodes") .and_then(|n| n.as_array()) .map_or(0, |arr| arr.len()); println!("๐Ÿ“Š Cleanup complete: {} -> {} nodes ({} duplicates removed)", original_count, final_count, duplicates_removed); Ok(duplicates_removed) } fn calculate_node_quality_score(node: &serde_json::Value) -> f32 { let mut score = 0.0; // Prefer nodes with slice combinations if let Some(combinations) = node.get("available_combinations").and_then(|ac| ac.as_array()) { if !combinations.is_empty() { score += 10.0; } } // Prefer nodes with marketplace SLA if node.get("marketplace_sla").is_some() { score += 5.0; } // Prefer nodes with rental options if node.get("rental_options").is_some() { score += 3.0; } // Prefer nodes with recent slice calculations if node.get("slice_last_calculated").is_some() { score += 2.0; } // Prefer nodes with grid data if node.get("grid_data").is_some() { score += 1.0; } // Prefer nodes with higher total base slices if let Some(total_slices) = node.get("total_base_slices").and_then(|ts| ts.as_u64()) { score += total_slices as f32 * 0.1; } score }