From aab2b6f12886320c753e6f4d5e723182a922266c Mon Sep 17 00:00:00 2001 From: Maxime Van Hees Date: Thu, 21 Aug 2025 13:32:03 +0200 Subject: [PATCH] fixed cloud hypervisor issues + updated test script (working now) --- packages/system/virt/src/cloudhv/mod.rs | 73 +++++++++++++++++-- .../virt/tests/rhai/05_cloudhv_basic.rhai | 35 +++++++++ 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/packages/system/virt/src/cloudhv/mod.rs b/packages/system/virt/src/cloudhv/mod.rs index c57ec92..e35c78d 100644 --- a/packages/system/virt/src/cloudhv/mod.rs +++ b/packages/system/virt/src/cloudhv/mod.rs @@ -8,6 +8,7 @@ use std::time::Duration; use sal_os; use sal_process; +use crate::qcow2; /// Error type for Cloud Hypervisor operations #[derive(Debug)] @@ -216,6 +217,45 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { }; let log_file = vm_log_path(id).to_string_lossy().into_owned(); + // Ensure API socket directory exists and remove any stale socket file + let api_path = Path::new(&api_socket); + if let Some(parent) = api_path.parent() { + fs::create_dir_all(parent).map_err(|e| CloudHvError::IoError(e.to_string()))?; + } + // Best-effort removal of stale socket + let _ = fs::remove_file(&api_path); + + // Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported" + // This is best-effort: if qemu-img is unavailable or info fails, we skip conversion. + let mut disk_to_use = rec.spec.disk_path.clone(); + if let Ok(info) = qcow2::info(&disk_to_use) { + if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") { + let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned(); + let cmd = format!( + "qemu-img convert -O raw {} {}", + shell_escape(&disk_to_use), + shell_escape(&dest) + ); + match sal_process::run(&cmd).silent(true).execute() { + Ok(res) if res.success => { + disk_to_use = dest; + } + Ok(res) => { + return Err(CloudHvError::CommandFailed(format!( + "Failed converting qcow2 to raw: {}", + res.stderr + ))); + } + Err(e) => { + return Err(CloudHvError::CommandFailed(format!( + "Failed converting qcow2 to raw: {}", + e + ))); + } + } + } + } + // Build command (minimal args for Phase 2) // We redirect all output to log_file via shell and keep process in background with nohup @@ -249,7 +289,7 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> { } parts.push("--disk".into()); - parts.push(format!("path={}", rec.spec.disk_path)); + parts.push(format!("path={}", disk_to_use)); parts.push("--cpus".into()); parts.push(format!("boot={}", rec.spec.vcpus)); parts.push("--memory".into()); @@ -342,20 +382,27 @@ pub fn vm_stop(id: &str, force: bool) -> Result<(), CloudHvError> { let _ = sal_process::run(&cmd).die(false).silent(true).execute(); } - // Wait a bit for process to exit + // Wait for process to exit (up to ~10s) if let Some(pid) = rec.runtime.pid { - for _ in 0..20 { + for _ in 0..50 { if !proc_exists(pid) { break; } thread::sleep(Duration::from_millis(200)); } - // If still alive and force, kill -9 + // If still alive and force, kill -9 and wait again (up to ~10s) if proc_exists(pid) && force { + // Send SIGKILL without extra shell layers; suppress errors/noise let _ = sal_process::run(&format!("kill -9 {}", pid)) .die(false) .silent(true) .execute(); + for _ in 0..50 { + if !proc_exists(pid) { + break; + } + thread::sleep(Duration::from_millis(200)); + } } } @@ -380,12 +427,22 @@ pub fn vm_delete(id: &str, delete_disks: bool) -> Result<(), CloudHvError> { let rec: VmRecord = serde_json::from_value(read_json(&p)?) .map_err(|e| CloudHvError::JsonError(e.to_string()))?; - // Refuse to delete if still running + // If appears to be running, attempt a force stop first (best-effort) if let Some(pid) = rec.runtime.pid { if proc_exists(pid) { - return Err(CloudHvError::CommandFailed( - "VM appears to be running; stop it first".into(), - )); + let _ = vm_stop(id, true); + // Re-check original PID for liveness (up to ~5s) + for _ in 0..25 { + if !proc_exists(pid) { + break; + } + thread::sleep(Duration::from_millis(200)); + } + if proc_exists(pid) { + return Err(CloudHvError::CommandFailed( + "VM appears to be running; stop it first".into(), + )); + } } } diff --git a/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai b/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai index cb2510f..5dad849 100644 --- a/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai +++ b/packages/system/virt/tests/rhai/05_cloudhv_basic.rhai @@ -105,6 +105,41 @@ if !missing { print(`⚠️ VM start failed (this can happen if kernel/cmdline are incompatible): ${err}`); } + print("\n waiting for VM to be ready..."); + + // Discover API socket and PID from SAL + let info1 = cloudhv_vm_info(vm_id); + let api_sock = info1.spec.api_socket; + let pid = info1.runtime.pid; + + // 1) Wait for API socket to appear (up to ~50s) + let sock_ok = false; + for x in 0..50 { + if exist(api_sock) { sock_ok = true; break; } + sleep(1); + } + print(`api_sock_exists=${sock_ok} path=${api_sock}`); + + // 2) Probe ch-remote info with retries (up to ~20s) + if sock_ok { + let info_ok = false; + for x in 0..20 { + let r = run_silent(`ch-remote-static --api-socket ${api_sock} info`); + if r.success { + info_ok = true; + break; + } + sleep(1); + } + if info_ok { + print("VM API is ready (ch-remote info OK)"); + } else { + print("⚠️ VM API did not become ready in time (continuing)"); + } + } else { + print("⚠️ API socket not found (continuing)"); + } + print("\n--- Test 5: Stop VM (graceful) ---"); try { cloudhv_vm_stop(vm_id, false);