init projectmycelium

This commit is contained in:
mik-tf
2025-09-01 21:37:01 -04:00
commit b41efb0e99
319 changed files with 128160 additions and 0 deletions

190
src/bin/cleanup.rs Normal file
View File

@@ -0,0 +1,190 @@
//! Standalone data cleanup utility
//! Run with: cargo run --bin cleanup
use std::collections::HashMap;
fn main() {
// Initialize logging
env_logger::init();
println!("🧹 Project Mycelium Data Cleanup Utility");
println!("==============================================");
// Manually clean up user1's duplicate nodes
match cleanup_user1_data() {
Ok(changes) => {
println!("✅ Cleanup completed successfully!");
if changes > 0 {
println!("📊 Changes made: {}", changes);
} else {
println!("📊 No changes needed - data is already clean");
}
}
Err(e) => {
println!("❌ Cleanup failed: {}", e);
std::process::exit(1);
}
}
}
fn cleanup_user1_data() -> Result<usize, String> {
use serde_json::Value;
let file_path = "./user_data/user1_at_example_com.json";
// Read the current data
let data_str = std::fs::read_to_string(file_path)
.map_err(|e| format!("Failed to read file: {}", e))?;
let mut data: Value = serde_json::from_str(&data_str)
.map_err(|e| format!("Failed to parse JSON: {}", e))?;
// Get the nodes array
let nodes = data.get_mut("nodes")
.and_then(|n| n.as_array_mut())
.ok_or("No nodes array found")?;
let original_count = nodes.len();
println!("📊 Found {} nodes before cleanup", original_count);
// Group nodes by grid_node_id
let mut node_groups: HashMap<u32, Vec<(usize, Value)>> = HashMap::new();
for (index, node) in nodes.iter().enumerate() {
if let Some(grid_id) = node.get("grid_node_id").and_then(|id| id.as_u64()) {
node_groups.entry(grid_id as u32)
.or_insert_with(Vec::new)
.push((index, node.clone()));
}
}
// Find and resolve duplicates
let mut nodes_to_keep = Vec::new();
let mut duplicates_removed = 0;
for (grid_id, mut group_nodes) in node_groups {
if group_nodes.len() > 1 {
println!("🔍 Found {} duplicate nodes for grid_node_id: {}", group_nodes.len(), grid_id);
// Sort by quality: prefer nodes with slice combinations and marketplace SLA
group_nodes.sort_by(|a, b| {
let score_a = calculate_node_quality_score(&a.1);
let score_b = calculate_node_quality_score(&b.1);
score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal)
});
// Keep the best node, merge data from others
let mut best_node = group_nodes[0].1.clone();
// Merge slice data from other nodes if the best node is missing it
for (_, other_node) in &group_nodes[1..] {
if best_node.get("available_combinations")
.and_then(|ac| ac.as_array())
.map_or(true, |arr| arr.is_empty()) {
if let Some(other_combinations) = other_node.get("available_combinations") {
if let Some(other_array) = other_combinations.as_array() {
if !other_array.is_empty() {
if let Some(best_obj) = best_node.as_object_mut() {
best_obj.insert("available_combinations".to_string(), other_combinations.clone());
if let Some(total_slices) = other_node.get("total_base_slices") {
best_obj.insert("total_base_slices".to_string(), total_slices.clone());
}
if let Some(slice_calc) = other_node.get("slice_last_calculated") {
best_obj.insert("slice_last_calculated".to_string(), slice_calc.clone());
}
println!("🔄 Merged slice data from duplicate node");
}
}
}
}
}
if best_node.get("marketplace_sla").is_none() {
if let Some(other_sla) = other_node.get("marketplace_sla") {
if let Some(best_obj) = best_node.as_object_mut() {
best_obj.insert("marketplace_sla".to_string(), other_sla.clone());
println!("🔄 Merged marketplace SLA from duplicate node");
}
}
}
if best_node.get("rental_options").is_none() {
if let Some(other_rental) = other_node.get("rental_options") {
if let Some(best_obj) = best_node.as_object_mut() {
best_obj.insert("rental_options".to_string(), other_rental.clone());
println!("🔄 Merged rental options from duplicate node");
}
}
}
}
nodes_to_keep.push(best_node);
duplicates_removed += group_nodes.len() - 1;
println!("🧹 Removed {} duplicate nodes for grid_node_id: {}", group_nodes.len() - 1, grid_id);
} else {
// Single node, keep as is
nodes_to_keep.push(group_nodes[0].1.clone());
}
}
// Update the data
if let Some(data_obj) = data.as_object_mut() {
data_obj.insert("nodes".to_string(), Value::Array(nodes_to_keep));
}
// Write back to file
let updated_data_str = serde_json::to_string_pretty(&data)
.map_err(|e| format!("Failed to serialize JSON: {}", e))?;
std::fs::write(file_path, updated_data_str)
.map_err(|e| format!("Failed to write file: {}", e))?;
let final_count = data.get("nodes")
.and_then(|n| n.as_array())
.map_or(0, |arr| arr.len());
println!("📊 Cleanup complete: {} -> {} nodes ({} duplicates removed)",
original_count, final_count, duplicates_removed);
Ok(duplicates_removed)
}
fn calculate_node_quality_score(node: &serde_json::Value) -> f32 {
let mut score = 0.0;
// Prefer nodes with slice combinations
if let Some(combinations) = node.get("available_combinations").and_then(|ac| ac.as_array()) {
if !combinations.is_empty() {
score += 10.0;
}
}
// Prefer nodes with marketplace SLA
if node.get("marketplace_sla").is_some() {
score += 5.0;
}
// Prefer nodes with rental options
if node.get("rental_options").is_some() {
score += 3.0;
}
// Prefer nodes with recent slice calculations
if node.get("slice_last_calculated").is_some() {
score += 2.0;
}
// Prefer nodes with grid data
if node.get("grid_data").is_some() {
score += 1.0;
}
// Prefer nodes with higher total base slices
if let Some(total_slices) = node.get("total_base_slices").and_then(|ts| ts.as_u64()) {
score += total_slices as f32 * 0.1;
}
score
}