diff --git a/cloud-hypervisor/Cargo.toml b/cloud-hypervisor/Cargo.toml index be61df65f6..fc53690086 100644 --- a/cloud-hypervisor/Cargo.toml +++ b/cloud-hypervisor/Cargo.toml @@ -47,7 +47,7 @@ wait-timeout = { workspace = true } # feature list below [features] dbus_api = ["vmm/dbus_api", "zbus"] -default = ["io_uring", "kvm"] +default = ["io_uring", "kvm", "fw_cfg"] dhat-heap = ["dhat", "vmm/dhat-heap"] # For heap profiling fw_cfg = ["vmm/fw_cfg"] guest_debug = ["vmm/guest_debug"] diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index c5200e5f50..95a447bf78 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -79,12 +79,27 @@ pub const PORT_FW_CFG_WIDTH: u64 = 0x10; const FW_CFG_SIGNATURE: u16 = 0x00; const FW_CFG_ID: u16 = 0x01; +const FW_CFG_UUID: u16 = 0x02; +const FW_CFG_RAM_SIZE: u16 = 0x03; +const FW_CFG_NOGRAPHIC: u16 = 0x04; +const FW_CFG_NB_CPUS: u16 = 0x05; +const FW_CFG_MACHINE_ID: u16 = 0x06; +const FW_CFG_KERNEL_ADDR: u16 = 0x07; const FW_CFG_KERNEL_SIZE: u16 = 0x08; +const FW_CFG_KERNEL_CMDLINE: u16 = 0x09; +const FW_CFG_INITRD_ADDR: u16 = 0x0a; const FW_CFG_INITRD_SIZE: u16 = 0x0b; +const FW_CFG_BOOT_DEVICE: u16 = 0x0c; +const FW_CFG_NUMA: u16 = 0x0d; +const FW_CFG_BOOT_MENU: u16 = 0x0e; +const FW_CFG_MAX_CPUS: u16 = 0x0f; +const FW_CFG_KERNEL_ENTRY: u16 = 0x10; const FW_CFG_KERNEL_DATA: u16 = 0x11; const FW_CFG_INITRD_DATA: u16 = 0x12; +const FW_CFG_CMDLINE_ADDR: u16 = 0x13; const FW_CFG_CMDLINE_SIZE: u16 = 0x14; const FW_CFG_CMDLINE_DATA: u16 = 0x15; +const FW_CFG_SETUP_ADDR: u16 = 0x16; const FW_CFG_SETUP_SIZE: u16 = 0x17; const FW_CFG_SETUP_DATA: u16 = 0x18; const FW_CFG_FILE_DIR: u16 = 0x19; @@ -116,7 +131,9 @@ pub enum FwCfgContent { Bytes(Vec), Slice(&'static [u8]), File(u64, File), + U64(u64), U32(u32), + U16(u16), } struct FwCfgContentAccess<'a> { @@ -139,10 +156,18 @@ impl Read for FwCfgContentAccess<'_> { Some(mut s) => s.read(buf), None => Err(ErrorKind::UnexpectedEof)?, }, + FwCfgContent::U64(n) => match n.to_le_bytes().get(self.offset as usize..) { + Some(mut s) => s.read(buf), + None => Err(ErrorKind::UnexpectedEof)?, + }, FwCfgContent::U32(n) => match n.to_le_bytes().get(self.offset as usize..) { Some(mut s) => s.read(buf), None => Err(ErrorKind::UnexpectedEof)?, }, + FwCfgContent::U16(n) => match n.to_le_bytes().get(self.offset as usize..) { + Some(mut s) => s.read(buf), + None => Err(ErrorKind::UnexpectedEof)?, + }, } } } @@ -159,7 +184,9 @@ impl FwCfgContent { FwCfgContent::Bytes(v) => v.len(), FwCfgContent::File(offset, f) => (f.metadata()?.len() - offset) as usize, FwCfgContent::Slice(s) => s.len(), + FwCfgContent::U64(n) => size_of_val(n), FwCfgContent::U32(n) => size_of_val(n), + FwCfgContent::U16(n) => size_of_val(n), }; u32::try_from(ret).map_err(|_| std::io::ErrorKind::InvalidInput.into()) } @@ -419,10 +446,34 @@ impl FwCfg { pub fn new(memory: GuestMemoryAtomic>) -> FwCfg { const DEFAULT_ITEM: FwCfgContent = FwCfgContent::Slice(&[]); let mut known_items = [DEFAULT_ITEM; FW_CFG_KNOWN_ITEMS]; + let file_buf = Vec::from(FwCfgFilesHeader { count_be: 0 }.as_mut_bytes()); + known_items[FW_CFG_SIGNATURE as usize] = FwCfgContent::Slice(&FW_CFG_DMA_SIGNATURE); known_items[FW_CFG_ID as usize] = FwCfgContent::Slice(&FW_CFG_FEATURE); - let file_buf = Vec::from(FwCfgFilesHeader { count_be: 0 }.as_mut_bytes()); - known_items[FW_CFG_FILE_DIR as usize] = FwCfgContent::Bytes(file_buf); + known_items[FW_CFG_UUID as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_RAM_SIZE as usize] = FwCfgContent::U64(0); /* TODO? */ + known_items[FW_CFG_NOGRAPHIC as usize] = FwCfgContent::U16(0); /* TODO? */ + known_items[FW_CFG_NB_CPUS as usize] = FwCfgContent::U16(0); /* TODO? */ + known_items[FW_CFG_MACHINE_ID as usize] = FwCfgContent::U16(0); /* TODO? */ + known_items[FW_CFG_KERNEL_ADDR as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_KERNEL_SIZE as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_KERNEL_CMDLINE as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_INITRD_ADDR as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_INITRD_SIZE as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_BOOT_DEVICE as usize] = FwCfgContent::U16(0); /* TODO? */ + known_items[FW_CFG_NUMA as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_BOOT_MENU as usize] = FwCfgContent::U16(0); /* TODO? */ + known_items[FW_CFG_MAX_CPUS as usize] = FwCfgContent::U16(255); /* TODO? */ + known_items[FW_CFG_KERNEL_ENTRY as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_KERNEL_DATA as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_INITRD_DATA as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_CMDLINE_ADDR as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_CMDLINE_SIZE as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_CMDLINE_DATA as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_SETUP_ADDR as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_SETUP_SIZE as usize] = FwCfgContent::U32(0); /* TODO? */ + known_items[FW_CFG_SETUP_DATA as usize] = FwCfgContent::Bytes(Default::default()); /* TODO? */ + known_items[FW_CFG_FILE_DIR as usize] = FwCfgContent::Bytes(file_buf); /* TODO? */ FwCfg { selector: 0, @@ -737,10 +788,18 @@ impl FwCfg { FwCfgContent::File(o, f) => { f.read_exact_at(data, o + offset as u64).ok()?; } + FwCfgContent::U64(n) => { + let bytes = n.to_le_bytes(); + data.copy_from_slice(&bytes[start..end]); + } FwCfgContent::U32(n) => { let bytes = n.to_le_bytes(); data.copy_from_slice(&bytes[start..end]); } + FwCfgContent::U16(n) => { + let bytes = n.to_le_bytes(); + data.copy_from_slice(&bytes[start..end]); + } } Some(size as u8) } diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 29cf8de56f..01a93ce43c 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -1370,6 +1370,7 @@ impl PciDeviceCommonConfig { } impl DiskConfig { + #[cfg(not(feature = "fw_cfg"))] pub const SYNTAX: &'static str = "Disk parameters \ \"path=,readonly=on|off,direct=on|off,iommu=on|off,\ num_queues=,queue_size=,\ @@ -1382,6 +1383,20 @@ impl DiskConfig { serial=,backing_files=on|off,sparse=on|off,\ image_type=,lock_granularity=byte-range|full"; + #[cfg(feature = "fw_cfg")] + pub const SYNTAX: &'static str = "Disk parameters \ + \"path=,readonly=on|off,direct=on|off,iommu=on|off,\ + num_queues=,queue_size=,\ + vhost_user=on|off,socket=,\ + bw_size=,bw_one_time_burst=,bw_refill_time=,\ + ops_size=,ops_one_time_burst=,ops_refill_time=,\ + id=,pci_segment=,pci_device_id=,\ + rate_limit_group=,\ + queue_affinity=,\ + serial=,backing_files=on|off,sparse=on|off,\ + image_type=,lock_granularity=byte-range|full,\ + bootindex="; + pub fn parse(disk: &str) -> Result { let mut parser = OptionParser::new(); parser @@ -1409,6 +1424,9 @@ impl DiskConfig { .add("lock_granularity") .add_all(PciDeviceCommonConfig::OPTIONS_IOMMU); + #[cfg(feature = "fw_cfg")] + parser.add("bootindex"); + parser.parse(disk).map_err(Error::ParseDisk)?; let path = parser.get("path").map(PathBuf::from); @@ -1538,6 +1556,11 @@ impl DiskConfig { let pci_common = PciDeviceCommonConfig::parse(disk)?; + #[cfg(feature = "fw_cfg")] + let bootindex = parser + .convert::("bootindex") + .map_err(Error::ParseDisk)?; + Ok(DiskConfig { pci_common, path, @@ -1557,6 +1580,8 @@ impl DiskConfig { sparse, image_type, lock_granularity, + #[cfg(feature = "fw_cfg")] + bootindex, }) } @@ -4125,6 +4150,8 @@ mod unit_tests { sparse: true, image_type: ImageType::Unknown, lock_granularity: LockGranularityChoice::default(), + #[cfg(feature = "fw_cfg")] + bootindex: Default::default(), } } diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 60e07f0029..93c3787299 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -119,6 +119,8 @@ use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; use crate::memory_manager::{Error as MemoryManagerError, MEMORY_MANAGER_ACPI_SIZE, MemoryManager}; use crate::pci_segment::PciSegment; use crate::serial_manager::{Error as SerialManagerError, SerialManager}; +#[cfg(feature = "fw_cfg")] +use crate::vm_config::FwCfgConfig; #[cfg(feature = "ivshmem")] use crate::vm_config::IvshmemConfig; use crate::vm_config::{ @@ -1149,6 +1151,9 @@ pub struct DeviceManager { #[cfg(feature = "ivshmem")] // ivshmem device ivshmem_device: Option>>, + + #[cfg(feature = "fw_cfg")] + boot_order: HashMap, } /// Create per-PCI-segment MMIO allocators over the range `[start, end]`. @@ -1439,6 +1444,8 @@ impl DeviceManager { #[cfg(feature = "ivshmem")] ivshmem_device: None, _acpi_cpu_hotplug_controller: acpi_cpu_hotplug_controller, + #[cfg(feature = "fw_cfg")] + boot_order: HashMap::new(), }; let device_manager = Arc::new(Mutex::new(device_manager)); @@ -1565,6 +1572,18 @@ impl DeviceManager { self.ivshmem_device = self.add_ivshmem_device(ivshmem, snapshot)?; } + #[cfg(feature = "fw_cfg")] + { + // if we have a boot order but no fw_cfg device, we must create on + let mut vm_config = self.config.lock().unwrap(); + if !self.boot_order.is_empty() + && vm_config.payload.is_some() + && vm_config.payload.as_mut().unwrap().fw_cfg_config.is_none() + { + vm_config.payload.as_mut().unwrap().fw_cfg_config = Some(FwCfgConfig::default()); + } + } + Ok(()) } @@ -2895,6 +2914,11 @@ impl DeviceManager { .unwrap() .insert(id.clone(), device_node!(id, migratable_device)); + #[cfg(feature = "fw_cfg")] + if let Some(bootindex) = disk_cfg.bootindex { + self.boot_order.insert(bootindex, id.clone()); + } + Ok(MetaVirtioDevice { virtio_device, pci_common: disk_cfg.pci_common.clone(), @@ -4403,11 +4427,31 @@ impl DeviceManager { } // Update the device tree with correct resource information. + #[cfg(feature = "fw_cfg")] + let node_id = node.id.clone(); + node.resources = new_resources; node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc>); node.pci_bdf = Some(pci_device_bdf); node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); - self.device_tree.lock().unwrap().insert(id, node); + self.device_tree.lock().unwrap().insert(id.clone(), node); + + #[cfg(feature = "fw_cfg")] + { + // Find the virtio device in the boot item list + let boot_entry = self + .boot_order + .iter() + .find(|(_k, v)| *v == virtio_device_id); + + // Update the device ID in the boot item list + if let Some((k, _)) = boot_entry { + self.boot_order.insert( + *k, + self.device_tree.lock().unwrap().fw_cfg_path_by_id(&node_id), + ); + } + } Ok(pci_device_bdf) } @@ -5319,7 +5363,6 @@ impl DeviceManager { self.vfio_ops = None; } } - /// Helps the environment converge quickly after a live migration by /// prompting devices to advertise the VM from its new host. /// @@ -5362,6 +5405,19 @@ impl DeviceManager { MAX_DELAY, ); } + + /// Returns a list of devices to boot from in the order expected + #[cfg(feature = "fw_cfg")] + pub fn get_bootitems(&self) -> Vec { + let mut result: Vec = Vec::new(); + let mut keys: Vec = self.boot_order.clone().into_keys().collect(); + keys.sort(); + + for k in &keys { + result.push(self.boot_order.get(k).unwrap().clone()); + } + result + } } /// Starts a thread that periodically performs the post-migration announcements. diff --git a/vmm/src/device_tree.rs b/vmm/src/device_tree.rs index e84cb524de..51ff67dc7a 100644 --- a/vmm/src/device_tree.rs +++ b/vmm/src/device_tree.rs @@ -7,11 +7,27 @@ use std::sync::{Arc, Mutex}; use pci::PciBdf; use serde::{Deserialize, Serialize}; +#[cfg(feature = "fw_cfg")] +use thiserror::Error; use vm_device::Resource; use vm_migration::Migratable; use crate::device_manager::PciDeviceHandle; +#[cfg(feature = "fw_cfg")] +#[derive(Debug, Error)] +pub enum DeviceNodePathError { + /// Device is not attached to a bus. + #[error("Can't create a open firmware device path from a device not attached to a bus")] + NoBusDevice, + /// Device is not bootable. + #[error("Can't create a open firmware device path for a non-bootable device")] + DeviceNotBootable, + /// Device is not a PCI device. + #[error("Currently only PCI devices are supported!")] + NoPciDevice, +} + #[derive(Clone, Serialize, Deserialize)] pub struct DeviceNode { pub id: String, @@ -37,6 +53,36 @@ impl DeviceNode { pci_device_handle: None, } } + + /// Returns the open firmware device path + /// + /// The open firmware path is used by Qemu in it's `fw_cfg` device to pass boot order hints + /// to EDK2/OVMF. [0] + /// + /// [0] https://github.com/tianocore/edk2/blob/2816ff0ab0d6505bf580aa8eec02cc2b89f04230/OvmfPkg/Library/QemuBootOrderLib/QemuBootOrderLib.c#L1222 + #[cfg(feature = "fw_cfg")] + pub fn openfw_device_path(&self) -> Result { + if let Some(handle) = &self.pci_device_handle { + if let PciDeviceHandle::Virtio(virtio_handle) = handle { + let virtio_device = virtio_handle.lock().unwrap(); + let device_type = virtio_device.virtio_device().lock().unwrap().device_type(); + if let Some(bdf) = self.pci_bdf { + match device_type { + 0x2 /* VirtioBlk */ => { + Ok(format!("/pci@i0cf8/scsi@{:x}/disk@0,0", bdf.device())) + } + _ => Err(DeviceNodePathError::DeviceNotBootable), + } + } else { + unreachable!("PCI devices should have a bdf!") + } + } else { + Err(DeviceNodePathError::NoPciDevice) + } + } else { + Err(DeviceNodePathError::NoBusDevice) + } + } } #[macro_export] @@ -102,6 +148,11 @@ impl DeviceTree { None } } + + #[cfg(feature = "fw_cfg")] + pub fn fw_cfg_path_by_id(&self, id: &str) -> String { + self.get(id).unwrap().openfw_device_path().unwrap() + } } // Breadth first traversal iterator. diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index d62173e5fa..d551d966a3 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -38,7 +38,7 @@ use devices::AcpiNotificationFlags; #[cfg(target_arch = "aarch64")] use devices::interrupt_controller; #[cfg(feature = "fw_cfg")] -use devices::legacy::fw_cfg::FwCfgItem; +use devices::legacy::fw_cfg::{FwCfgContent, FwCfgItem}; use event_monitor::event; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; @@ -1249,8 +1249,8 @@ impl Vm { initramfs_option = initramfs; } let mut fw_cfg_item_list_option: Option> = None; + let mut fw_cfg_item_list = vec![]; if let Some(fw_cfg_items) = &fw_cfg_config.items { - let mut fw_cfg_item_list = vec![]; for fw_cfg_item in fw_cfg_items.item_list.clone() { let content = match (fw_cfg_item.string, fw_cfg_item.file) { (Some(string_val), None) => { @@ -1269,6 +1269,28 @@ impl Vm { content, }); } + } + // Create bootorder entry in fw_cfg + { + let mut content: Vec = Vec::new(); + let bootitems = device_manager.lock().unwrap().get_bootitems(); + for (index, boot_item) in bootitems.iter().enumerate() { + let cstring = std::ffi::CString::new(boot_item.as_str()).unwrap(); + if index == (bootitems.len() - 1) { + content.extend_from_slice(cstring.as_bytes_with_nul()); + } else { + content.extend_from_slice(cstring.as_bytes()); + content.extend_from_slice(std::ffi::CString::new("\n").unwrap().as_bytes()); + } + } + let bootorder_item = FwCfgItem { + name: "bootorder".to_string(), + content: FwCfgContent::Bytes(content), + }; + fw_cfg_item_list.push(bootorder_item); + } + + if !fw_cfg_item_list.is_empty() { fw_cfg_item_list_option = Some(fw_cfg_item_list); } diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 12ade3e9e0..913d245448 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -383,6 +383,9 @@ pub struct DiskConfig { pub image_type: ImageType, #[serde(default)] pub lock_granularity: LockGranularityChoice, + #[cfg(feature = "fw_cfg")] + #[serde(default)] + pub bootindex: Option, } impl ApplyLandlock for DiskConfig { @@ -931,9 +934,9 @@ impl Default for FwCfgConfig { fn default() -> Self { FwCfgConfig { e820: true, - kernel: true, - cmdline: true, - initramfs: true, + kernel: false, + cmdline: false, + initramfs: false, acpi_tables: true, items: None, }