From f8436a726e51392fb4f2e48ea94c898c9797848a Mon Sep 17 00:00:00 2001 From: Maxime Van Hees Date: Tue, 9 Sep 2025 14:25:09 +0200 Subject: [PATCH] cleanup 2 --- packages/system/virt/src/cloudhv/mod.rs | 218 +++++++++++++++++- packages/system/virt/src/cloudhv/net/mod.rs | 30 ++- packages/system/virt/src/image_prep/mod.rs | 16 +- packages/system/virt/src/rhai/cloudhv.rs | 16 ++ .../virt/tests/rhai/vm_clean_launch.rhai | 24 +- 5 files changed, 273 insertions(+), 31 deletions(-) diff --git a/packages/system/virt/src/cloudhv/mod.rs b/packages/system/virt/src/cloudhv/mod.rs index ab9ba9f..d7db758 100644 --- a/packages/system/virt/src/cloudhv/mod.rs +++ b/packages/system/virt/src/cloudhv/mod.rs @@ -74,6 +74,15 @@ pub struct VmRuntime { pub status: String, /// Console log file path pub log_file: String, + /// Bridge name used for networking discovery (if applicable) + #[serde(default)] + pub bridge_name: Option, + /// dnsmasq lease file used (if applicable) + #[serde(default)] + pub lease_file: Option, + /// Stable MAC used for NIC injection (derived from VM id) + #[serde(default)] + pub mac: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -108,11 +117,88 @@ fn vm_dir(id: &str) -> PathBuf { fn vm_json_path(id: &str) -> PathBuf { vm_dir(id).join("vm.json") } +// Attempt to resolve a VM record across both the current user's HOME and root HOME. +// This handles cases where the VM was created/launched under sudo (HOME=/root). +fn resolve_vm_json_path(id: &str) -> Option { + let candidates = vec![ + hero_vm_root(), // $HOME/hero/virt/vms + Path::new("/root/hero/virt/vms").to_path_buf(), + ]; + for base in candidates { + let p = base.join(id).join("vm.json"); + if p.exists() { + return Some(p); + } + } + None +} fn vm_log_path(id: &str) -> PathBuf { vm_dir(id).join("logs/console.log") } +/// Attempt to resolve an API socket across both the current user's HOME and root HOME. +/// This handles cases where the VM was launched under sudo (HOME=/root). +fn resolve_vm_api_socket_path(id: &str) -> Option { + let candidates = vec![ + hero_vm_root(), // $HOME/hero/virt/vms + Path::new("/root/hero/virt/vms").to_path_buf(), + ]; + for base in candidates { + let p = base.join(id).join("api.sock"); + if p.exists() { + return Some(p); + } + } + None +} + +/// Query cloud-hypervisor for the first NIC's tap and mac via ch-remote-static. +/// Returns (tap_name, mac_lower) if successful. +fn ch_query_tap_mac(api_sock: &Path) -> Option<(String, String)> { + let cmd = format!( + "ch-remote-static --api-socket {} info", + shell_escape(&api_sock.to_string_lossy()) + ); + if let Ok(res) = sal_process::run(&cmd).silent(true).die(false).execute() { + if res.success { + if let Ok(v) = serde_json::from_str::(&res.stdout) { + if let Some(net0) = v + .get("config") + .and_then(|c| c.get("net")) + .and_then(|n| n.get(0)) + { + let tap = net0.get("tap").and_then(|t| t.as_str()).unwrap_or("").to_string(); + let mac = net0.get("mac").and_then(|m| m.as_str()).unwrap_or("").to_string(); + if !tap.is_empty() && !mac.is_empty() { + return Some((tap, mac.to_lowercase())); + } + } + } + } + } + None +} + +/// Infer the bridge name a tap device is attached to by parsing `ip -o link show ` output. +fn bridge_name_for_tap(tap: &str) -> Option { + let cmd = format!("ip -o link show {}", shell_escape(tap)); + if let Ok(res) = sal_process::run(&cmd).silent(true).die(false).execute() { + if res.success { + for line in res.stdout.lines() { + if let Some(idx) = line.find(" master ") { + let rest = &line[idx + " master ".len()..]; + let name = rest.split_whitespace().next().unwrap_or(""); + if !name.is_empty() { + return Some(name.to_string()); + } + } + } + } + } + None +} + fn vm_pid_path(id: &str) -> PathBuf { vm_dir(id).join("pid") } @@ -180,6 +266,23 @@ pub fn vm_create(spec: &VmSpec) -> Result { return Err(CloudHvError::InvalidSpec("memory_mb must be >= 128".into())); } + // If a VM with this id already exists, ensure it's not running to avoid clobber + resource conflicts + let json_path = vm_json_path(&spec.id); + if json_path.exists() { + if let Ok(value) = read_json(&json_path) { + if let Ok(existing) = serde_json::from_value::(value.clone()) { + if let Some(pid) = existing.runtime.pid { + if proc_exists(pid) { + return Err(CloudHvError::CommandFailed(format!( + "VM '{}' already exists and is running with pid {}. Stop or delete it first, or choose a different id.", + spec.id, pid + ))); + } + } + } + } + } + // Prepare directory layout let dir = vm_dir(&spec.id); sal_os::mkdir( @@ -190,17 +293,35 @@ pub fn vm_create(spec: &VmSpec) -> Result { let log_dir = dir.join("logs"); sal_os::mkdir(log_dir.to_str().unwrap()).map_err(|e| CloudHvError::IoError(e.to_string()))?; - // Persist initial record + // Build runtime (preserve prior metadata if present; will be refreshed on start) + let mut runtime = VmRuntime { + pid: None, + status: "stopped".into(), + log_file: vm_log_path(&spec.id).to_string_lossy().into_owned(), + bridge_name: None, + lease_file: None, + mac: None, + }; + if json_path.exists() { + if let Ok(value) = read_json(&json_path) { + if let Ok(existing) = serde_json::from_value::(value) { + if !existing.runtime.log_file.is_empty() { + runtime.log_file = existing.runtime.log_file; + } + runtime.bridge_name = existing.runtime.bridge_name; + runtime.lease_file = existing.runtime.lease_file; + runtime.mac = existing.runtime.mac; + } + } + } + + // Persist record (spec updated, runtime preserved/reset to stopped) let rec = VmRecord { spec: spec.clone(), - runtime: VmRuntime { - pid: None, - status: "stopped".into(), - log_file: vm_log_path(&spec.id).to_string_lossy().into_owned(), - }, + runtime, }; let value = serde_json::to_value(&rec).map_err(|e| CloudHvError::JsonError(e.to_string()))?; - write_json(&vm_json_path(&spec.id), &value)?; + write_json(&json_path, &value)?; Ok(spec.id.clone()) } @@ -563,6 +684,9 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { rec.runtime.pid = pid; rec.runtime.status = if pid.is_some() { "running".into() } else { "stopped".into() }; rec.runtime.log_file = log_file; + rec.runtime.bridge_name = bridge_for_disc.clone(); + rec.runtime.lease_file = lease_for_disc.clone(); + rec.runtime.mac = Some(net::stable_mac_from_id(id)); rec.spec.api_socket = api_socket.clone(); let value = serde_json::to_value(&rec).map_err(|e| CloudHvError::JsonError(e.to_string()))?; @@ -584,17 +708,89 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { Ok(()) } -/// Return VM record info (spec + runtime) by id +//// Return VM record info (spec + runtime) by id pub fn vm_info(id: &str) -> Result { - let p = vm_json_path(id); - if !p.exists() { + // Try current user's VM root first, then fall back to /root (common when VM was launched under sudo) + let p_user = vm_json_path(id); + let p = if p_user.exists() { + p_user + } else if let Some(p2) = resolve_vm_json_path(id) { + p2 + } else { return Err(CloudHvError::NotFound(format!("VM '{}' not found", id))); - } + }; let value = read_json(&p)?; let rec: VmRecord = serde_json::from_value(value).map_err(|e| CloudHvError::JsonError(e.to_string()))?; Ok(rec) } +//// Discover VM network info using persisted metadata (bridge/lease/mac) with sensible fallbacks. +/// Returns (IPv4, IPv6, MAC, BridgeName, LeaseFile), each optional. +pub fn vm_network_info( + id: &str, + timeout_secs: u64, +) -> Result<(Option, Option, Option, Option, Option), CloudHvError> { + let rec = vm_info(id)?; + + // Start with persisted/env/default values + let mut bridge_name = rec + .runtime + .bridge_name + .clone() + .or_else(|| std::env::var("HERO_VIRT_BRIDGE_NAME").ok()) + .unwrap_or_else(|| "br-hero".into()); + + // MAC: persisted or deterministically derived (lowercased for matching) + let mut mac_lower = rec + .runtime + .mac + .clone() + .unwrap_or_else(|| net::stable_mac_from_id(id)) + .to_lowercase(); + + // Attempt to query CH for ground-truth (tap, mac) if API socket is available + if let Some(api_sock) = resolve_vm_api_socket_path(id) { + if let Some((tap, mac_from_ch)) = ch_query_tap_mac(&api_sock) { + mac_lower = mac_from_ch; + if let Some(br) = bridge_name_for_tap(&tap) { + bridge_name = br; + } + } + } + + // Lease file: persisted -> env -> derived from (possibly overridden) bridge + let lease_path = rec + .runtime + .lease_file + .clone() + .or_else(|| std::env::var("HERO_VIRT_DHCP_LEASE_FILE").ok()) + .unwrap_or_else(|| format!("/var/lib/misc/dnsmasq-hero-{}.leases", bridge_name)); + + // Discover addresses + let ipv4 = net::discover_ipv4_from_leases(&lease_path, &mac_lower, timeout_secs); + let ipv6 = { + use std::time::{Duration, Instant}; + let deadline = Instant::now() + Duration::from_secs(timeout_secs); + let mut v6: Option = None; + while Instant::now() < deadline { + if let Some(ip) = net::discover_ipv6_on_bridge(&bridge_name, &mac_lower) { + v6 = Some(ip); + break; + } + std::thread::sleep(Duration::from_millis(800)); + } + v6 + }; + + Ok(( + ipv4, + ipv6, + Some(mac_lower), + Some(bridge_name), + Some(lease_path), + )) +} + /// Stop a VM via ch-remote (graceful), optionally force kill pub fn vm_stop(id: &str, force: bool) -> Result<(), CloudHvError> { ensure_deps().ok(); // best-effort; we might still force-kill diff --git a/packages/system/virt/src/cloudhv/net/mod.rs b/packages/system/virt/src/cloudhv/net/mod.rs index a0705b1..e47c90f 100644 --- a/packages/system/virt/src/cloudhv/net/mod.rs +++ b/packages/system/virt/src/cloudhv/net/mod.rs @@ -189,8 +189,7 @@ if [ -n \"$IPV6_CIDR\" ]; then printf '%s\\n' \ \"enable-ra\" \ \"dhcp-range=$BRIDGE_PREFIX,ra-names,12h\" \ - \"dhcp-option=option6:dns-server,[2001:4860:4860::8888]\" \ - \"dhcp-option=option6:route,3600,400::/7,[$BRIDGE_ADDR]\" >>\"$TMP\" + \"dhcp-option=option6:dns-server,[2001:4860:4860::8888]\" >>\"$TMP\" fi if [ ! -f \"$CFG\" ] || ! cmp -s \"$CFG\" \"$TMP\"; then @@ -246,7 +245,15 @@ TAP={tap} UIDX=$(id -u) GIDX=$(id -g) -ip link show \"$TAP\" >/dev/null 2>&1 || ip tuntap add dev \"$TAP\" mode tap user \"$UIDX\" group \"$GIDX\" +# Ensure a clean TAP state to avoid Resource busy if a previous VM run left it lingering +if ip link show \"$TAP\" >/dev/null 2>&1; then + ip link set \"$TAP\" down || true + ip link set \"$TAP\" nomaster 2>/dev/null || true + ip tuntap del dev \"$TAP\" mode tap 2>/dev/null || true +fi + +# Recreate with correct ownership and attach to bridge +ip tuntap add dev \"$TAP\" mode tap user \"$UIDX\" group \"$GIDX\" ip link set \"$TAP\" master \"$BR\" 2>/dev/null || true ip link set \"$TAP\" up ", @@ -258,13 +265,18 @@ ip link set \"$TAP\" up } /// Stable locally-administered unicast MAC derived from VM id. +/// IMPORTANT: Use a deterministic hash (FNV-1a) rather than DefaultHasher (which is randomized). pub fn stable_mac_from_id(id: &str) -> String { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - let mut h = DefaultHasher::new(); - id.hash(&mut h); - let v = h.finish(); - let b0 = (((v >> 40) & 0xff) as u8 & 0xfe) | 0x02; // locally administered, unicast + // 64-bit FNV-1a + const FNV_OFFSET: u64 = 0xcbf29ce484222325; + const FNV_PRIME: u64 = 0x00000100000001B3; + let mut v: u64 = FNV_OFFSET; + for b in id.as_bytes() { + v ^= *b as u64; + v = v.wrapping_mul(FNV_PRIME); + } + // Locally administered, unicast + let b0 = (((v >> 40) & 0xff) as u8 & 0xfe) | 0x02; let b1 = ((v >> 32) & 0xff) as u8; let b2 = ((v >> 24) & 0xff) as u8; let b3 = ((v >> 16) & 0xff) as u8; diff --git a/packages/system/virt/src/image_prep/mod.rs b/packages/system/virt/src/image_prep/mod.rs index 3801c27..9568d6a 100644 --- a/packages/system/virt/src/image_prep/mod.rs +++ b/packages/system/virt/src/image_prep/mod.rs @@ -83,19 +83,21 @@ fn default_disable_cloud_init_net() -> bool { } fn stable_mac_from_id(id: &str) -> String { - let mut h = DefaultHasher::new(); - id.hash(&mut h); - let v = h.finish(); + // Use deterministic FNV-1a (matches host-side MAC derivation used by CH builder) + const FNV_OFFSET: u64 = 0xcbf29ce484222325; + const FNV_PRIME: u64 = 0x00000100000001B3; + let mut v: u64 = FNV_OFFSET; + for b in id.as_bytes() { + v ^= *b as u64; + v = v.wrapping_mul(FNV_PRIME); + } let b0 = (((v >> 40) & 0xff) as u8 & 0xfe) | 0x02; // locally administered, unicast let b1 = ((v >> 32) & 0xff) as u8; let b2 = ((v >> 24) & 0xff) as u8; let b3 = ((v >> 16) & 0xff) as u8; let b4 = ((v >> 8) & 0xff) as u8; let b5 = (v & 0xff) as u8; - format!( - "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", - b0, b1, b2, b3, b4, b5 - ) + format!("{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", b0, b1, b2, b3, b4, b5) } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/packages/system/virt/src/rhai/cloudhv.rs b/packages/system/virt/src/rhai/cloudhv.rs index c1a614a..0075bc3 100644 --- a/packages/system/virt/src/rhai/cloudhv.rs +++ b/packages/system/virt/src/rhai/cloudhv.rs @@ -230,6 +230,21 @@ pub fn cloudhv_display_network_info(vm_id: &str, ipv4: Dynamic, ipv6: Dynamic) { println!(" cloudhv_vm_delete(\"{}\", true);", vm_id); } +/// High-level network discovery that avoids hardcoded MAC/paths. +/// Returns a Rhai map with fields: ipv4, ipv6, mac, bridge, lease. +pub fn cloudhv_vm_network_info(id: &str, timeout_secs: i64) -> Result> { + let (ipv4, ipv6, mac, bridge, lease) = + hv_to_rhai(cloudhv::vm_network_info(id, timeout_secs as u64))?; + let mut m = Map::new(); + m.insert("vm_id".into(), id.to_string().into()); + m.insert("ipv4".into(), ipv4.map(Into::into).unwrap_or(Dynamic::UNIT)); + m.insert("ipv6".into(), ipv6.map(Into::into).unwrap_or(Dynamic::UNIT)); + m.insert("mac".into(), mac.map(Into::into).unwrap_or(Dynamic::UNIT)); + m.insert("bridge".into(), bridge.map(Into::into).unwrap_or(Dynamic::UNIT)); + m.insert("lease".into(), lease.map(Into::into).unwrap_or(Dynamic::UNIT)); + Ok(m) +} + // Module registration pub fn register_cloudhv_module(engine: &mut Engine) -> Result<(), Box> { @@ -239,6 +254,7 @@ pub fn register_cloudhv_module(engine: &mut Engine) -> Result<(), Box