diff --git a/packages/system/virt/src/cloudhv/mod.rs b/packages/system/virt/src/cloudhv/mod.rs index 248c6e6..4f5ee51 100644 --- a/packages/system/virt/src/cloudhv/mod.rs +++ b/packages/system/virt/src/cloudhv/mod.rs @@ -43,6 +43,8 @@ pub struct VmSpec { pub id: String, /// Optional for firmware boot; required for direct kernel boot pub kernel_path: Option, + /// Optional initramfs when using direct kernel boot + pub initramfs_path: Option, /// Optional for direct kernel boot; required for firmware boot pub firmware_path: Option, /// Disk image path (qcow2 or raw) @@ -228,38 +230,104 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { let _ = fs::remove_file(&api_path); // Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported" - // This is best-effort: if qemu-img is unavailable or info fails, we skip conversion. + // Robust conversion: + // - Remove any stale destination + // - Try direct convert to destination file + // - On failure (e.g., byte-range lock issues), fallback to piping stdout into dd let mut disk_to_use = rec.spec.disk_path.clone(); if let Ok(info) = qcow2::info(&disk_to_use) { if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") { let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned(); - let cmd = format!( + // Best-effort remove stale target file to avoid locking errors + let _ = fs::remove_file(&dest); + + // Attempt 1: normal qemu-img convert to dest file + let cmd1 = format!( "qemu-img convert -O raw {} {}", shell_escape(&disk_to_use), shell_escape(&dest) ); - match sal_process::run(&cmd).silent(true).execute() { - Ok(res) if res.success => { - disk_to_use = dest; + let attempt1 = sal_process::run(&cmd1).silent(true).die(false).execute(); + + let mut converted_ok = false; + let mut err1: Option = None; + + if let Ok(res) = attempt1 { + if res.success { + converted_ok = true; + } else { + err1 = Some(format!("{}{}", res.stdout, res.stderr)); } - Ok(res) => { - return Err(CloudHvError::CommandFailed(format!( - "Failed converting qcow2 to raw: {}", - res.stderr - ))); - } - Err(e) => { - return Err(CloudHvError::CommandFailed(format!( - "Failed converting qcow2 to raw: {}", - e - ))); + } else if let Err(e) = attempt1 { + err1 = Some(e.to_string()); + } + + if !converted_ok { + // Attempt 2: pipe via stdout into dd (avoids qemu-img destination locking semantics on some FS) + let cmd2 = format!( + "#!/bin/bash -euo pipefail\nqemu-img convert -O raw {} - | dd of={} bs=4M status=none", + shell_escape(&disk_to_use), + shell_escape(&dest) + ); + match sal_process::run(&cmd2).silent(true).die(false).execute() { + Ok(res) if res.success => { + converted_ok = true; + } + Ok(res) => { + let mut msg = String::from("Failed converting qcow2 to raw."); + if let Some(e1) = err1 { + msg.push_str(&format!("\nFirst attempt error:\n{}", e1)); + } + msg.push_str(&format!("\nSecond attempt error:\n{}{}", res.stdout, res.stderr)); + return Err(CloudHvError::CommandFailed(msg)); + } + Err(e) => { + let mut msg = String::from("Failed converting qcow2 to raw."); + if let Some(e1) = err1 { + msg.push_str(&format!("\nFirst attempt error:\n{}", e1)); + } + msg.push_str(&format!("\nSecond attempt error:\n{}", e)); + return Err(CloudHvError::CommandFailed(msg)); + } } } + + if converted_ok { + disk_to_use = dest; + } } } - // Build command (minimal args for Phase 2) - // We redirect all output to log_file via shell and keep process in background with nohup + // Consolidate extra --disk occurrences from spec.extra_args into a single --disk (CH version requires variadic form) + // Collect disk value tokens provided by the user and strip them from extra args so we can render one '--disk' followed by multiple values. + let mut extra_disk_vals: Vec = Vec::new(); + let mut extra_args_sans_disks: Vec = Vec::new(); + if let Some(extra) = rec.spec.extra_args.clone() { + let mut i = 0usize; + while i < extra.len() { + let tok = extra[i].clone(); + if tok == "--disk" { + if i + 1 < extra.len() { + extra_disk_vals.push(extra[i + 1].clone()); + i += 2; + continue; + } else { + // dangling --disk without value; drop it + i += 1; + continue; + } + } else if let Some(rest) = tok.strip_prefix("--disk=") { + if !rest.is_empty() { + extra_disk_vals.push(rest.to_string()); + } + i += 1; + continue; + } + // keep token + extra_args_sans_disks.push(tok); + i += 1; + } + } // CH CLI flags (very common subset) // --disk path=... uses virtio-blk by default @@ -282,6 +350,12 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { .unwrap_or_else(|| "console=ttyS0 reboot=k panic=1".to_string()); parts.push("--kernel".into()); parts.push(kpath); + if let Some(initrd) = rec.spec.initramfs_path.clone() { + if Path::new(&initrd).exists() { + parts.push("--initramfs".into()); + parts.push(initrd); + } + } parts.push("--cmdline".into()); parts.push(cmdline); } else { @@ -292,6 +366,10 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { parts.push("--disk".into()); parts.push(format!("path={}", disk_to_use)); + // Append any additional disk value tokens (from sanitized extra args) so CH sees a single '--disk' with multiple values + for dv in &extra_disk_vals { + parts.push(dv.clone()); + } parts.push("--cpus".into()); parts.push(format!("boot={}", rec.spec.vcpus)); parts.push("--memory".into()); @@ -301,36 +379,50 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { parts.push("--console".into()); parts.push("off".into()); - // Networking prerequisites (bridge + NAT via nftables + dnsmasq DHCP) - // Defaults can be overridden via env: - // HERO_VIRT_BRIDGE_NAME, HERO_VIRT_BRIDGE_ADDR_CIDR, HERO_VIRT_SUBNET_CIDR, HERO_VIRT_DHCP_START, HERO_VIRT_DHCP_END - let bridge_name = std::env::var("HERO_VIRT_BRIDGE_NAME").unwrap_or_else(|_| "br-hero".into()); - let bridge_addr_cidr = std::env::var("HERO_VIRT_BRIDGE_ADDR_CIDR").unwrap_or_else(|_| "172.30.0.1/24".into()); - let subnet_cidr = std::env::var("HERO_VIRT_SUBNET_CIDR").unwrap_or_else(|_| "172.30.0.0/24".into()); - let dhcp_start = std::env::var("HERO_VIRT_DHCP_START").unwrap_or_else(|_| "172.30.0.50".into()); - let dhcp_end = std::env::var("HERO_VIRT_DHCP_END").unwrap_or_else(|_| "172.30.0.250".into()); + // Determine if the user provided explicit network arguments (e.g. "--net", "tap=...,mac=...") + // If so, do NOT provision the default host networking or add a default NIC. + let has_user_net = rec + .spec + .extra_args + .as_ref() + .map(|v| v.iter().any(|tok| tok == "--net")) + .unwrap_or(false); - // Ensure host-side networking (requires root privileges / CAP_NET_ADMIN) - ensure_host_net_prereq_dnsmasq_nftables( - &bridge_name, - &bridge_addr_cidr, - &subnet_cidr, - &dhcp_start, - &dhcp_end, - )?; + if !has_user_net { + // Networking prerequisites (bridge + NAT via nftables + dnsmasq DHCP) + // Defaults can be overridden via env: + // HERO_VIRT_BRIDGE_NAME, HERO_VIRT_BRIDGE_ADDR_CIDR, HERO_VIRT_SUBNET_CIDR, HERO_VIRT_DHCP_START, HERO_VIRT_DHCP_END + let bridge_name = std::env::var("HERO_VIRT_BRIDGE_NAME").unwrap_or_else(|_| "br-hero".into()); + let bridge_addr_cidr = + std::env::var("HERO_VIRT_BRIDGE_ADDR_CIDR").unwrap_or_else(|_| "172.30.0.1/24".into()); + let subnet_cidr = + std::env::var("HERO_VIRT_SUBNET_CIDR").unwrap_or_else(|_| "172.30.0.0/24".into()); + let dhcp_start = + std::env::var("HERO_VIRT_DHCP_START").unwrap_or_else(|_| "172.30.0.50".into()); + let dhcp_end = + std::env::var("HERO_VIRT_DHCP_END").unwrap_or_else(|_| "172.30.0.250".into()); - // Ensure a TAP device for this VM and attach to the bridge - let tap_name = ensure_tap_for_vm(&bridge_name, id)?; - // Stable locally-administered MAC derived from VM id - let mac = stable_mac_from_id(id); + // Ensure host-side networking (requires root privileges / CAP_NET_ADMIN) + ensure_host_net_prereq_dnsmasq_nftables( + &bridge_name, + &bridge_addr_cidr, + &subnet_cidr, + &dhcp_start, + &dhcp_end, + )?; - parts.push("--net".into()); - parts.push(format!("tap={},mac={}", tap_name, mac)); + // Ensure a TAP device for this VM and attach to the bridge + let tap_name = ensure_tap_for_vm(&bridge_name, id)?; + // Stable locally-administered MAC derived from VM id + let mac = stable_mac_from_id(id); - if let Some(extra) = rec.spec.extra_args.clone() { - for e in extra { - parts.push(e); - } + parts.push("--net".into()); + parts.push(format!("tap={},mac={}", tap_name, mac)); + } + + // Append any user-provided extra args, sans any '--disk' we already consolidated + for e in extra_args_sans_disks { + parts.push(e); } let args_str = shell_join(&parts); @@ -369,6 +461,32 @@ echo $! > '{}' Err(_) => None, }; + // Quick health check: ensure process did not exit immediately due to CLI errors (e.g., duplicate flags) + if let Some(pid_num) = pid { + thread::sleep(Duration::from_millis(300)); + if !proc_exists(pid_num) { + // Tail log to surface the error cause + let tail_cmd = format!("tail -n 200 {}", shell_escape(&log_file)); + let tail = sal_process::run(&tail_cmd).die(false).silent(true).execute(); + let mut log_snip = String::new(); + if let Ok(res) = tail { + if res.success { + log_snip = res.stdout; + } else { + log_snip = format!("{}{}", res.stdout, res.stderr); + } + } + return Err(CloudHvError::CommandFailed(format!( + "cloud-hypervisor exited immediately after start. Log tail:\n{}", + log_snip + ))); + } + } else { + return Err(CloudHvError::CommandFailed( + "failed to obtain cloud-hypervisor PID (start script did not write pid)".into(), + )); + } + // Update state rec.runtime.pid = pid; rec.runtime.status = if pid.is_some() { "running".into() } else { "stopped".into() }; diff --git a/packages/system/virt/src/rhai/cloudhv.rs b/packages/system/virt/src/rhai/cloudhv.rs index 29e429f..c39c0b9 100644 --- a/packages/system/virt/src/rhai/cloudhv.rs +++ b/packages/system/virt/src/rhai/cloudhv.rs @@ -17,6 +17,7 @@ fn hv_to_rhai(r: Result) -> Result Result> { let id = must_get_string(&spec, "id")?; let kernel_path = get_string(&spec, "kernel_path"); + let initramfs_path = get_string(&spec, "initramfs_path"); let firmware_path = get_string(&spec, "firmware_path"); let disk_path = must_get_string(&spec, "disk_path")?; let api_socket = get_string(&spec, "api_socket").unwrap_or_else(|| "".to_string()); @@ -28,6 +29,7 @@ fn map_to_vmspec(spec: Map) -> Result> { Ok(VmSpec { id, kernel_path, + initramfs_path, firmware_path, disk_path, api_socket, @@ -46,6 +48,11 @@ fn vmspec_to_map(s: &VmSpec) -> Map { } else { m.insert("kernel_path".into(), Dynamic::UNIT); } + if let Some(ir) = &s.initramfs_path { + m.insert("initramfs_path".into(), ir.clone().into()); + } else { + m.insert("initramfs_path".into(), Dynamic::UNIT); + } if let Some(fw) = &s.firmware_path { m.insert("firmware_path".into(), fw.clone().into()); } else { diff --git a/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai b/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai index 5dad849..39ec523 100644 --- a/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai +++ b/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai @@ -140,25 +140,25 @@ if !missing { print("⚠️ API socket not found (continuing)"); } - print("\n--- Test 5: Stop VM (graceful) ---"); - try { - cloudhv_vm_stop(vm_id, false); - print("✓ VM stop invoked (graceful)"); - } catch (err) { - print(`⚠️ VM stop failed: ${err}`); - } + // print("\n--- Test 5: Stop VM (graceful) ---"); + // try { + // cloudhv_vm_stop(vm_id, false); + // print("✓ VM stop invoked (graceful)"); + // } catch (err) { + // print(`⚠️ VM stop failed: ${err}`); + // } } else { print("\n⚠️ Skipping start/stop because required inputs are missing."); } -print("\n--- Test 6: Delete VM definition ---"); -try { - cloudhv_vm_delete(vm_id, false); - print("✓ VM deleted"); -} catch (err) { - print(`❌ VM delete failed: ${err}`); - print("=== CloudHV Tests Aborted ==="); - exit(); -} +// print("\n--- Test 6: Delete VM definition ---"); +// try { +// cloudhv_vm_delete(vm_id, false); +// print("✓ VM deleted"); +// } catch (err) { +// print(`❌ VM delete failed: ${err}`); +// print("=== CloudHV Tests Aborted ==="); +// exit(); +// } print("\n=== Cloud Hypervisor Basic Tests Completed ==="); \ No newline at end of file diff --git a/packages/system/virt/tests/rhai/06_cloudhv_cloudinit_dhcpd.rhai b/packages/system/virt/tests/rhai/06_cloudhv_cloudinit_dhcpd.rhai index 77608d7..3eb9f83 100644 --- a/packages/system/virt/tests/rhai/06_cloudhv_cloudinit_dhcpd.rhai +++ b/packages/system/virt/tests/rhai/06_cloudhv_cloudinit_dhcpd.rhai @@ -16,7 +16,7 @@ print("=== CloudHV + cloud-init + host DHCP (dnsmasq) ==="); // ----------- User input ----------- -let user_pubkey = "ssh-ed25519 REPLACE_WITH_YOUR_PUBLIC_KEY user@host"; +let user_pubkey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFyZJCEsvRc0eitsOoq+ywC5Lmqejvk3hXMVbO0AxPrd maxime@maxime-arch"; // Optional: choose boot method. If firmware is present in common locations, it will be used. // Otherwise, if kernel_path exists, direct kernel boot will be used. @@ -31,6 +31,8 @@ let br_cidr = "192.168.127.1/24"; let br_ip = "192.168.127.1"; let tap = "tap0"; let mac = "02:00:00:00:00:10"; // locally administered MAC +// Deterministic IP for the VM (dnsmasq will pin this MAC to this IP) +let vm_static_ip = "192.168.127.100"; // Paths let base_dir = "/tmp/virt_images"; @@ -81,8 +83,9 @@ print("✓ Dependencies look OK"); print("\n--- Ensuring Ubuntu 24.04 cloud image ---"); let base; try { - // Adjust the size_gb as desired; this resizes the cloud image sparsely. - base = qcow2_build_ubuntu_24_04_base(base_dir, 10); + // Avoid resizing to prevent GPT backup-header mismatch that can break early boot on some kernels/firmware. + // Use 0 to keep the original image size; cloud-init/cloud-image tooling can grow the FS later if needed. + base = qcow2_build_ubuntu_24_04_base(base_dir, 0); } catch (err) { print(`❌ Failed to build/ensure base image: ${err}`); exit(); @@ -93,55 +96,106 @@ print(`✓ Using base image: ${disk_path}`); // ----------- Host networking (bridge + tap) ----------- print("\n--- Configuring host networking (bridge + tap) ---"); // Idempotent: create br0 if missing; assign IP if not present; set up -run_silent(`sudo sh -lc 'ip link show ${bridge} >/dev/null 2>&1 || ip link add ${bridge} type bridge'`); -run_silent(`sudo sh -lc 'ip addr show dev ${bridge} | grep -q "${br_cidr}" || ip addr add ${br_cidr} dev ${bridge}'`); -run_silent(`sudo sh -lc 'ip link set ${bridge} up'`); +let net_script = ` +sudo ip link show ${bridge} >/dev/null 2>&1 || sudo ip link add ${bridge} type bridge +ip addr show dev ${bridge} | grep -q "${br_cidr}" || sudo ip addr add ${br_cidr} dev ${bridge} +sudo ip link set ${bridge} up -// Idempotent: create tap and attach to bridge -run_silent(`sudo sh -lc 'ip link show ${tap} >/dev/null 2>&1 || ip tuntap add dev ${tap} mode tap'`); -run_silent(`sudo sh -lc 'bridge link | grep -q "${tap}" || ip link set ${tap} master ${bridge}'`); -run_silent(`sudo sh -lc 'ip link set ${tap} up'`); +# Remove any stale TAP to avoid "Resource busy" when CH configures it +if ip link show ${tap} >/dev/null 2>&1; then + sudo ip link set ${tap} down || true + sudo ip link del ${tap} || true +fi +`; +run_silent(net_script); print(`✓ Bridge ${bridge} and tap ${tap} configured`); +print("Note: NO-CARRIER on a bridge/tap without a peer is normal; DHCP will work once the guest brings its interface up."); // ----------- Start/ensure dnsmasq on br0 ----------- print("\n--- Ensuring dnsmasq serving DHCP on the bridge ---"); +// Ensure log/lease directory exists before starting dnsmasq +run_silent(`mkdir -p ${base_dir}`); // If an instance with our pid-file is running, keep it; otherwise start a new one bound to br0. // Use --port=0 to avoid DNS port conflicts; we only need DHCP here. -let dns_state = run_silent(`bash -lc 'if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then echo RUNNING; else echo STOPPED; fi'`); +let dns_state = run_silent(` +if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then + echo RUNNING +elif pgrep -f "dnsmasq .*--interface=${bridge}" >/dev/null 2>&1; then + echo RUNNING +elif [ -f ${dnsmasq_log} ] && grep -q "sockets bound exclusively to interface ${bridge}" ${dnsmasq_log}; then + echo RUNNING +else + echo STOPPED +fi +`); let need_start = true; if dns_state.success && dns_state.stdout.trim() == "RUNNING" { print("✓ dnsmasq already running (pid file present and alive)"); need_start = false; } else { // Clean stale files - run_silent(`bash -lc 'rm -f ${dnsmasq_pid} ${dnsmasq_lease}'`); + run_silent(`rm -f ${dnsmasq_pid} ${dnsmasq_lease}`); } if need_start { - let cmd = ` - nohup sudo dnsmasq \ - --port=0 \ - --bind-interfaces \ - --except-interface=lo \ - --interface=${bridge} \ - --dhcp-range=192.168.127.100,192.168.127.200,12h \ - --dhcp-option=option:router,${br_ip} \ - --dhcp-option=option:dns-server,1.1.1.1 \ - --pid-file=${dnsmasq_pid} \ - --dhcp-leasefile=${dnsmasq_lease} \ - > ${dnsmasq_log} 2>&1 & - echo $! >/dev/null - `; - let r = run_silent(`bash -lc ${cmd.stringify()}`); + // Start dnsmasq detached and force a clean, self-contained configuration. + // - Use --conf-file=/dev/null to avoid system config conflicts + // - Log directly via --log-facility to capture early failures + // - Run under current privileges (herodo is invoked with sudo) + let r = run_silent(` +: > ${dnsmasq_log} +nohup dnsmasq \ + --conf-file=/dev/null \ + --log-facility=${dnsmasq_log} \ + --log-dhcp \ + --user=root \ + --group=root \ + --port=0 \ + --bind-interfaces \ + --except-interface=lo \ + --interface=${bridge} \ + --dhcp-range=192.168.127.100,192.168.127.200,12h \ + --dhcp-option=option:router,${br_ip} \ + --dhcp-option=option:dns-server,1.1.1.1 \ + --dhcp-host=${mac},${vm_static_ip} \ + --pid-file=${dnsmasq_pid} \ + --dhcp-leasefile=${dnsmasq_lease} & +`); if !r.success { print(`❌ Failed to start dnsmasq. Check log: ${dnsmasq_log}`); + let t = run_silent(` +if [ -f ${dnsmasq_log} ]; then + tail -n 200 ${dnsmasq_log} +fi +`); + if t.success && t.stdout.trim() != "" { print(t.stdout); } exit(); } - // Wait briefly for pid file - sleep(1); - let chk = run_silent(`bash -lc 'test -f ${dnsmasq_pid} && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1 && echo OK || echo FAIL'`); - if !(chk.success && chk.stdout.trim() == "OK") { + + // Robust readiness: wait up to 10s for pidfile OR process OR log pattern + let ready = run_silent(` +for i in $(seq 1 10); do + if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then + echo OK; exit 0 + fi + if pgrep -f "dnsmasq .*--interface=${bridge}" >/dev/null 2>&1; then + echo OK; exit 0 + fi + if [ -f ${dnsmasq_log} ] && grep -q "sockets bound exclusively to interface ${bridge}" ${dnsmasq_log}; then + echo OK; exit 0 + fi + sleep 1 +done +echo FAIL +`); + if !(ready.success && ready.stdout.contains("OK")) { print(`❌ dnsmasq did not come up. See ${dnsmasq_log}`); + let t = run_silent(` +if [ -f ${dnsmasq_log} ]; then + tail -n 200 ${dnsmasq_log} +fi +`); + if t.success && t.stdout.trim() != "" { print(t.stdout); } exit(); } print("✓ dnsmasq started (DHCP on br0)"); @@ -149,7 +203,8 @@ if need_start { // ----------- Build cloud-init NoCloud seed (user-data/meta-data) ----------- print("\n--- Building NoCloud seed (user-data, meta-data) ---"); -run_silent(`bash -lc 'mkdir -p ${base_dir}'`); +run_silent(`mkdir -p ${base_dir}`); +run_silent(`chmod 1777 ${base_dir}`); // Compose user-data and meta-data content let ud = `#cloud-config @@ -169,15 +224,41 @@ local-hostname: noblevm `; // Write files via heredoc -let wr1 = run_silent(`bash -lc "cat > ${user_data} <<'EOF'\n${ud}\nEOF"`); +let wr1 = run_silent(` +cat > ${user_data} <<'EOF' +${ud} +EOF +`); if !wr1.success { print(`❌ Failed to write ${user_data}`); exit(); } -let wr2 = run_silent(`bash -lc "cat > ${meta_data} <<'EOF'\n${md}\nEOF"`); +let wr2 = run_silent(` +cat > ${meta_data} <<'EOF' +${md} +EOF +`); if !wr2.success { print(`❌ Failed to write ${meta_data}`); exit(); } +// Provide cloud-init network-config to ensure the NIC with our MAC requests DHCP +let net_config = `${base_dir}/network-config`; +let nc = `version: 2 +ethernets: + nic0: + match: + macaddress: ${mac} + set-name: eth0 + renderer: networkd + dhcp4: true +`; +let wr3 = run_silent(` +cat > ${net_config} <<'EOF' +${nc} +EOF +`); +if !wr3.success { print(`❌ Failed to write ${net_config}`); exit(); } + // Build seed ISO (prefer cloud-localds) let built = false; if !(clds == () || clds == "") { - let r = run_silent(`bash -lc "sudo cloud-localds ${seed_iso} ${user_data} ${meta_data}"`); + let r = run_silent(`sudo cloud-localds --network-config ${net_config} ${seed_iso} ${user_data} ${meta_data}`); if r.success { built = true; } @@ -187,7 +268,7 @@ if !built { print("❌ Neither cloud-localds nor genisoimage succeeded/available to build seed.iso"); exit(); } - let r2 = run_silent(`bash -lc "sudo genisoimage -output ${seed_iso} -volid cidata -joliet -rock ${user_data} ${meta_data}"`); + let r2 = run_silent(`sudo genisoimage -output ${seed_iso} -volid cidata -joliet -rock ${user_data} ${meta_data} ${net_config}`); if !r2.success { print("❌ genisoimage failed to create seed.iso"); exit(); @@ -228,9 +309,18 @@ if firmware_path != "" { // ----------- Create and start VM ----------- print("\n--- Creating and starting VM ---"); let rid = run_silent("date +%s%N"); -let suffix = if rid.success && rid.stdout != "" { rid.stdout.trim() } else { "100000" }; +let suffix = if rid.success && rid.stdout.trim() != "" { rid.stdout.trim() } else { "100000" }; let vm_id = `noble_vm_${suffix}`; +// Use a unique TAP per run to avoid "Resource busy" conflicts. +// Keep name <= 15 chars (Linux IFNAMSIZ), e.g. "tap-abcdef". +let tn = run_silent("od -An -N3 -tx1 /dev/urandom | tr -d '[:space:]'"); +if tn.success && tn.stdout.trim() != "" { + tap = `tap-${tn.stdout.trim()}`; +} else { + tap = "tap-abcd01"; +} + let spec = #{ "id": vm_id, "disk_path": disk_path, @@ -260,6 +350,23 @@ try { try { cloudhv_vm_start(vm_id); print("✓ VM start invoked"); + + // After CH creates/opens the TAP, attach it to the bridge to allow DHCP broadcast to reach dnsmasq on br0. + // Avoid racing with CH tap configuration: wait briefly, then attempt attach. + let post_net = ` +# Give CH time to finish configuring tap to avoid EBUSY +sleep 1 +for i in $(seq 1 30); do + if ip link show ${tap} >/dev/null 2>&1; then + # Enslave to bridge and ensure up; ignore errors (idempotent) + sudo ip link set ${tap} master ${bridge} 2>/dev/null || true + sudo ip link set ${tap} up 2>/dev/null || true + break + fi + sleep 1 +done +`; + run_silent(post_net); } catch (err) { print(`❌ VM start failed: ${err}`); exit(); @@ -268,9 +375,68 @@ try { // ----------- Wait for DHCP lease and print access info ----------- print("\n--- Waiting for DHCP lease from dnsmasq ---"); let vm_ip = ""; + +// First try deterministic fixed IP via ping (dnsmasq pins MAC->IP) for i in 0..60 { + // Use a plain command (no shell operators). Success indicates reachability. + let pr = run_silent(`ping -c1 -W1 -I ${bridge} ${vm_static_ip}`); + if pr.success { + vm_ip = vm_static_ip; + break; + } sleep(1); - let lr = run_silent(`bash -lc "if [ -f ${dnsmasq_lease} ]; then awk '\\$2 ~ /${mac}/ {print \\$3}' ${dnsmasq_lease} | tail -n1; fi"`); +} +for i in 0..180 { + sleep(1); + // Discover and validate IPv4; prefer exact MAC match across common dnsmasq lease locations + let lr = run_silent(` +valid_ipv4() { echo "$1" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true; } + +# Candidate lease files (add more if your distro uses a different path) +LEASE_FILES="${dnsmasq_lease} /var/lib/misc/dnsmasq.leases /var/lib/dnsmasq/dnsmasq.leases" +# Include any runtime leases under /run/dnsmasq if present +if ls /run/dnsmasq/*.leases >/dev/null 2>&1; then + LEASE_FILES="$LEASE_FILES $(ls /run/dnsmasq/*.leases 2>/dev/null)" +fi + +# 1) Try to find by exact MAC across all known lease files +for f in $LEASE_FILES; do + [ -f "$f" ] || continue + ip="$(awk -v m="${mac}" '$2==m{ip=$3} END{if(ip!="") print ip}' "$f")" + if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi +done + +# 2) Fallback: last IP in our br0 subnet across all lease files +for f in $LEASE_FILES; do + [ -f "$f" ] || continue + ip="$(awk '$3 ~ /^192\\.168\\.127\\./ {ip=$3} END{if(ip!="") print ip}' "$f")" + if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi +done + +# 3) Fallback: SAL default subnet (172.30.0.0/24) across all lease files +for f in $LEASE_FILES; do + [ -f "$f" ] || continue + ip="$(awk '$3 ~ /^172\\.30\\.0\\./ {ip=$3} END{if(ip!="") print ip}' "$f")" + if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi +done + +# 4) ARP gleaning on likely bridges (br0 first, then br-hero) for the known MAC +for dev in ${bridge} br-hero; do + if ip -o link show "$dev" >/dev/null 2>&1; then + ip="$(ip neigh show dev "$dev" | awk '$0 ~ /lladdr ${mac}/ {print $1}' | tail -n1)" + if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi + fi +done + +# 5) As a last resort, ARP any 192.168.127.x seen on br0 +if ip -o link show ${bridge} >/dev/null 2>&1; then + ip="$(ip neigh show dev ${bridge} | awk '$1 ~ /^192\\.168\\.127\\./ {print $1}' | tail -n1)" + if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi +fi + +# No valid IP yet +true +`); if lr.success { let ip = lr.stdout.trim(); if ip != "" { @@ -279,11 +445,75 @@ for i in 0..60 { } } } +// Fallback: parse cloud-hypervisor console log for an IPv4 on our expected subnets +let info2 = cloudhv_vm_info(vm_id); +let log_path = info2.runtime.log_file; if vm_ip == "" { - print("⚠️ Could not discover VM IP from leases yet. You can check leases and retry:"); - print(` cat ${dnsmasq_lease}`); + let cp = run_silent(` +if [ -f ${log_path} ]; then + grep -Eo '([0-9]+\\.){3}[0-9]+' ${log_path} | grep -E '^(192\\.168\\.127|172\\.30\\.0)\\.' | tail -n1 +fi +`); + if cp.success { + let ip2 = cp.stdout.trim(); + if ip2 != "" { + vm_ip = ip2; + } + } +} +if vm_ip == "" { + // Actively populate ARP neighbor tables by sweeping likely subnets + run_silent(` +for ip in $(seq 100 200); do ping -c1 -W1 -I ${bridge} 192.168.127.$ip >/dev/null 2>&1 || true; done +if ip -o link show br-hero >/dev/null 2>&1; then + for ip in $(seq 50 250); do ping -c1 -W1 -I br-hero 172.30.0.$ip >/dev/null 2>&1 || true; done +fi +`); + // Re-check after ARP sweep using the same validated discovery logic + let lr2 = run_silent(` +get_ip_from_leases() { + f="$1"; prefix="$2"; + if [ -f "$f" ]; then + awk -v pfx="$prefix" '$3 ~ ("^" pfx) {ip=$3} END{if(ip!="") print ip}' "$f" + fi +} +valid_ipv4() { + echo "$1" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true +} +cand="$(get_ip_from_leases ${dnsmasq_lease} "192.168.127.")" +if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi +cand="$(get_ip_from_leases /var/lib/misc/dnsmasq.leases "192.168.127.")" +if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi +cand="$(get_ip_from_leases /var/lib/misc/dnsmasq.leases "172.30.0.")" +if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi +cand="$(ip neigh show dev ${bridge} | awk '$0 ~ /lladdr ${mac}/ {print $1}' | tail -n1)" +if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi +true +`); + if lr2.success { + let ip2 = lr2.stdout.trim(); + if ip2 != "" { + vm_ip = ip2; + } + } +} + +/* Final sanity: ensure vm_ip is a valid IPv4 dotted-quad before printing */ +let _chk = run_silent(`echo "${vm_ip}" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true`); +if !(_chk.success && _chk.stdout.trim() != "") { vm_ip = ""; } + +if vm_ip == "" { + print("❌ Could not discover VM IP after 180 seconds."); + print("Diagnostics you can run now:"); + print(` tail -n +1 ${dnsmasq_lease}`); + print(" cat /var/lib/misc/dnsmasq.leases | tail -n 5"); + print(` ip neigh show dev ${bridge} | grep '${mac}' || true`); + print("Exiting without SSH command because the IP could not be determined."); + exit(); } else { print(`✓ Lease acquired: ${vm_ip}`); + print("\nSSH command (key-only; default user 'ubuntu'):"); + print(`ssh -o StrictHostKeyChecking=no ubuntu@${vm_ip}`); } print("\n--- VM access details ---"); @@ -293,13 +523,7 @@ print(`API socket: ${info.spec.api_socket}`); print(`Console log: ${info.runtime.log_file}`); print(`Bridge: ${bridge} at ${br_ip}, TAP: ${tap}, MAC: ${mac}`); print(`Seed: ${seed_iso}`); -if vm_ip != "" { - print("\nSSH command (key-only; default user 'ubuntu'):"); - print(`ssh -o StrictHostKeyChecking=no ubuntu@${vm_ip}`); -} else { - print("\nSSH command (replace after you see a lease):"); - print(`ssh -o StrictHostKeyChecking=no ubuntu@`); -} +/* SSH command already printed above when lease was acquired */ print("\nCleanup hints (manual):"); print(`- Stop dnsmasq: sudo kill \$(cat ${dnsmasq_pid})`);