diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 060934d74..9cbc38aaf 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -28,12 +28,18 @@ use propolis::hw::ibmpc; use propolis::hw::pci; use propolis::hw::ps2::ctrl::PS2Ctrl; use propolis::hw::qemu::pvpanic::QemuPvpanic; -use propolis::hw::qemu::{debug::QemuDebugPort, fwcfg, ramfb}; +use propolis::hw::qemu::{ + debug::QemuDebugPort, + fwcfg::{self, Entry}, + ramfb, +}; use propolis::hw::uart::LpcUart; use propolis::hw::{nvme, virtio}; use propolis::intr_pins; use propolis::vmm::{self, Builder, Machine}; -use propolis_api_types::instance_spec::{self, v0::InstanceSpecV0}; +use propolis_api_types::instance_spec::{ + self, v0::BootDeclaration, v0::InstanceSpecV0, +}; use propolis_api_types::InstanceProperties; use slog::info; @@ -117,6 +123,7 @@ pub struct MachineInitializer<'a> { pub(crate) properties: &'a InstanceProperties, pub(crate) toml_config: &'a crate::server::VmTomlConfig, pub(crate) producer_registry: Option, + pub(crate) boot_order: Option>, pub(crate) state: MachineInitializerState, } @@ -994,6 +1001,121 @@ impl<'a> MachineInitializer<'a> { smb_tables.commit() } + fn generate_bootorder(&self) -> Result, Error> { + eprintln!( + "generating bootorder with order: {:?}", + self.boot_order.as_ref() + ); + let Some(boot_names) = self.boot_order.as_ref() else { + return Ok(None); + }; + + let mut order = fwcfg::formats::BootOrder::new(); + + let parse_bdf = + |pci_path: &propolis_api_types::instance_spec::PciPath| { + let bdf: Result = + pci_path.to_owned().try_into().map_err(|e| { + Error::new( + ErrorKind::InvalidInput, + format!( + "Couldn't get PCI BDF for {}: {}", + pci_path, e + ), + ) + }); + + bdf + }; + + for boot_entry in boot_names.iter() { + if boot_entry.first_boot_only { + continue; + } + // names may refer to a storage device or network device. + // + // realistically we won't be booting from network devices, but leave that as a question + // for plumbing on the next layer up. + + let storage_backend = |spec| { + let spec: &instance_spec::v0::StorageDeviceV0 = spec; + match spec { + instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { + &disk.backend_name + } + instance_spec::v0::StorageDeviceV0::NvmeDisk(disk) => { + &disk.backend_name + } + } + }; + + let network_backend = |spec| { + let spec: &instance_spec::v0::NetworkDeviceV0 = spec; + let instance_spec::v0::NetworkDeviceV0::VirtioNic(vnic_spec) = + spec; + + &vnic_spec.backend_name + }; + + if let Some(device_spec) = + self.spec.devices.storage_devices.iter().find_map( + |(_name, spec)| { + if storage_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }, + ) + { + match device_spec { + instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + // TODO: check that bus is 0. only support boot devices + // directly attached to the root bus for now. + order.add_disk(bdf.location); + } + instance_spec::v0::StorageDeviceV0::NvmeDisk(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + // TODO: check that bus is 0. only support boot devices + // directly attached to the root bus for now. + // + // TODO: separately, propolis-standalone passes an eui64 + // of 0, so do that here too. is that.. ok? + order.add_nvme(bdf.location, 0); + } + }; + } else if let Some(vnic_spec) = + self.spec.devices.network_devices.iter().find_map( + |(name, spec)| { + if network_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }, + ) + { + let instance_spec::v0::NetworkDeviceV0::VirtioNic(vnic_spec) = + vnic_spec; + + let bdf = parse_bdf(&vnic_spec.pci_path)?; + + order.add_pci(bdf.location, "ethernet"); + } else { + eprintln!( + "could not find {:?} in {:?} or {:?}", + boot_entry, + self.spec.devices.storage_devices, + self.spec.devices.network_devices + ); + panic!("TODO: return an error; the device name doesn't exist?"); + } + } + + Ok(Some(order.finish())) + } + /// Initialize qemu `fw_cfg` device, and populate it with data including CPU /// count, SMBIOS tables, and attached RAM-FB device. /// @@ -1010,6 +1132,13 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); + // TODO(ixi): extract `generate_smbios` and expect `smbios::TableBytes` as an argument to + // initialize_fwcfg. make `generate_smbios` accept rom size as an explicit parameter. this + // avoids the implicit panic if these are called out of order (and makes it harder to call + // out of order). + // + // one step towards sharing smbios init code between propolis-standalone and + // propolis-server. let smbios::TableBytes { entry_point, structure_table } = self.generate_smbios(); fwcfg @@ -1025,6 +1154,10 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); + if let Some(boot_order) = self.generate_bootorder()? { + fwcfg.insert_named("bootorder", boot_order).unwrap(); + } + let ramfb = ramfb::RamFb::create( self.log.new(slog::o!("component" => "ramfb")), ); diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 75232e9f7..881917638 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -177,6 +177,7 @@ impl<'a> VmEnsureNotStarted<'a> { properties, toml_config: &options.toml_config, producer_registry: options.oximeter_registry.clone(), + boot_order: v0_spec.devices.boot_order.clone(), state: MachineInitializerState::default(), }; diff --git a/bin/propolis-standalone/src/config.rs b/bin/propolis-standalone/src/config.rs index 3aff6c0da..69aec140e 100644 --- a/bin/propolis-standalone/src/config.rs +++ b/bin/propolis-standalone/src/config.rs @@ -136,7 +136,9 @@ pub fn block_backend( log: &slog::Logger, ) -> (Arc, String) { let backend_name = dev.options.get("block_dev").unwrap().as_str().unwrap(); - let be = config.block_devs.get(backend_name).unwrap(); + let Some(be) = config.block_devs.get(backend_name) else { + panic!("no configured block device named \"{}\"", backend_name); + }; let opts = block::BackendOpts { block_size: be.block_opts.block_size, read_only: be.block_opts.read_only, diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 1f210b661..7b3cf8496 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -959,8 +959,13 @@ fn generate_smbios(params: SmbiosParams) -> anyhow::Result { Ok(smb_tables.commit()) } -fn generate_bootorder(config: &config::Config) -> anyhow::Result { - let names = config.main.boot_order.as_ref().unwrap(); +fn generate_bootorder( + config: &config::Config, +) -> anyhow::Result> { + eprintln!("bootorder declared as {:?}", config.main.boot_order.as_ref()); + let Some(names) = config.main.boot_order.as_ref() else { + return Ok(None); + }; let mut order = fwcfg::formats::BootOrder::new(); for name in names.iter() { @@ -994,7 +999,7 @@ fn generate_bootorder(config: &config::Config) -> anyhow::Result { } } } - Ok(order.finish()) + Ok(Some(order.finish())) } fn setup_instance( @@ -1306,14 +1311,10 @@ fn setup_instance( // It is "safe" to generate bootorder (if requested) now, given that PCI // device configuration has been validated by preceding logic - if config.main.boot_order.is_some() { - fwcfg - .insert_named( - "bootorder", - generate_bootorder(&config) - .context("Failed to generate boot order")?, - ) - .unwrap(); + if let Some(boot_config) = + generate_bootorder(&config).context("Failed to generate boot order")? + { + fwcfg.insert_named("bootorder", boot_config).unwrap(); } fwcfg.attach(pio, &machine.acc_mem); diff --git a/crates/propolis-api-types/src/instance_spec/v0/mod.rs b/crates/propolis-api-types/src/instance_spec/v0/mod.rs index 344f83fcb..46fff1a1e 100644 --- a/crates/propolis-api-types/src/instance_spec/v0/mod.rs +++ b/crates/propolis-api-types/src/instance_spec/v0/mod.rs @@ -109,6 +109,11 @@ pub struct DeviceSpecV0 { #[serde(skip_serializing_if = "Option::is_none")] pub qemu_pvpanic: Option, + // same as above + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_order: Option>, + #[cfg(feature = "falcon")] pub softnpu_pci_port: Option, #[cfg(feature = "falcon")] @@ -177,6 +182,12 @@ impl DeviceSpecV0 { } } +#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] +pub struct BootDeclaration { + pub name: SpecKey, + pub first_boot_only: bool, +} + #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] #[serde(deny_unknown_fields, tag = "type", content = "component")] pub enum StorageBackendV0 { diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index e99303cd3..c6347a89b 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -184,6 +184,35 @@ impl SpecBuilderV0 { } } + /// Sets a boot order. Names here refer to devices included in this spec. + /// + /// Permissible to not this if the implicit boot order is desired, but the implicit boot order + /// may be unstable across device addition and removal. + /// + /// XXX: talk about what happens if names are included that do not name real devices..? + /// + /// XXX: this should certainly return `&mut Self` - all the builders here should. check if any + /// of these are chained..? + pub fn set_boot_order( + &mut self, + boot_order: Vec, + ) -> Result<&Self, SpecBuilderError> { + let boot_declarations = boot_order + .into_iter() + .map(|name| crate::types::BootDeclaration { + name, + first_boot_only: false, + }) + .collect(); + eprintln!("setting boot order to {:?}", boot_declarations); + self.spec.devices.boot_order = Some(boot_declarations); + + // TODO: would be nice to warn if any of the devices named here are not devices in the spec + // though. + + Ok(self) + } + /// Yields the completed spec, consuming the builder. pub fn finish(self) -> InstanceSpecV0 { self.spec diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 9d4eee36e..bcee6ce33 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -662,6 +662,13 @@ "additionalProperties": { "$ref": "#/components/schemas/StorageDeviceV0" } + }, + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } } }, "required": [ @@ -1463,6 +1470,24 @@ "vcr_matches" ] }, + "BootDeclaration": { + "description": "A boot preference for some device.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + }, + "first_boot_only": { + "description": "Should we include this device in the boot order for reboots of a started instance?", + "type": "boolean" + } + }, + "required": [ + "name", + "first_boot_only" + ] + }, "SerialPort": { "description": "A serial port device.", "type": "object", diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 77673132d..90495928e 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -37,6 +37,7 @@ pub enum DiskBackend { #[derive(Clone, Debug)] struct DiskRequest<'a> { + name: &'a str, interface: DiskInterface, backend: DiskBackend, source: DiskSource<'a>, @@ -48,8 +49,8 @@ pub struct VmConfig<'dr> { cpus: u8, memory_mib: u64, bootrom_artifact: String, - boot_disk: DiskRequest<'dr>, - data_disks: Vec>, + boot_order: Option>, + disks: Vec>, devices: BTreeMap, } @@ -63,22 +64,25 @@ impl<'dr> VmConfig<'dr> { bootrom: &str, guest_artifact: &'dr str, ) -> Self { - let boot_disk = DiskRequest { - interface: DiskInterface::Nvme, - backend: DiskBackend::File, - source: DiskSource::Artifact(guest_artifact), - pci_device_num: 4, - }; - - Self { + let mut config = Self { vm_name: vm_name.to_owned(), cpus, memory_mib, bootrom_artifact: bootrom.to_owned(), - boot_disk, - data_disks: Vec::new(), + boot_order: None, + disks: Vec::new(), devices: BTreeMap::new(), - } + }; + + config.data_disk( + "boot-disk", + DiskSource::Artifact(guest_artifact), + DiskInterface::Nvme, + DiskBackend::File, + 4, + ); + + config } pub fn cpus(&mut self, cpus: u8) -> &mut Self { @@ -119,6 +123,11 @@ impl<'dr> VmConfig<'dr> { self } + pub fn boot_order(&mut self, disks: Vec<&'dr str>) -> &mut Self { + self.boot_order = Some(disks); + self + } + pub fn boot_disk( &mut self, artifact: &'dr str, @@ -126,7 +135,17 @@ impl<'dr> VmConfig<'dr> { backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.boot_disk = DiskRequest { + // If you're calling `boot_disk` directly, you're probably not specifying an explicit boot + // order. the _implicit_ boot order seems to be the order disks are created in, so preserve + // that implied behavior by having a boot disk inserted up front. + // + // XXX: i thought that the implicit order would be *pci device order*, not disk creation + // order? + // XXX: figure out what to do if there is both an explicit boot order and a call to + // `.boot_disk`. + + self.disks[0] = DiskRequest { + name: "boot-disk", interface, backend, source: DiskSource::Artifact(artifact), @@ -138,12 +157,14 @@ impl<'dr> VmConfig<'dr> { pub fn data_disk( &mut self, + name: &'dr str, source: DiskSource<'dr>, interface: DiskInterface, backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.data_disks.push(DiskRequest { + self.disks.push(DiskRequest { + name, interface, backend, source, @@ -172,7 +193,10 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - let DiskSource::Artifact(boot_artifact) = self.boot_disk.source else { + let boot_disk = &self.disks[0]; + + // TODO: adapt this to check all listed boot devices + let DiskSource::Artifact(boot_artifact) = boot_disk.source else { unreachable!("boot disks always use artifacts as sources"); }; @@ -183,16 +207,12 @@ impl<'dr> VmConfig<'dr> { .context("getting guest OS kind for boot disk")?; let mut disk_handles = Vec::new(); - disk_handles.push( - make_disk("boot-disk".to_owned(), framework, &self.boot_disk) - .await - .context("creating boot disk")?, - ); - for (idx, data_disk) in self.data_disks.iter().enumerate() { + for disk in self.disks.iter() { disk_handles.push( - make_disk(format!("data-disk-{}", idx), framework, data_disk) + // format!("data-disk-{}", idx) + make_disk(disk.name.to_owned(), framework, disk) .await - .context("creating data disk")?, + .context("creating disk")?, ); } @@ -203,10 +223,7 @@ impl<'dr> VmConfig<'dr> { // elements for all of them. This assumes the disk handles were created // in the correct order: boot disk first, then in the data disks' // iteration order. - let all_disks = [&self.boot_disk] - .into_iter() - .chain(self.data_disks.iter()) - .zip(disk_handles.iter()); + let all_disks = self.disks.iter().zip(disk_handles.iter()); for (idx, (req, hdl)) in all_disks.enumerate() { let device_name = format!("disk-device{}", idx); let pci_path = PciPath::new(0, req.pci_device_num, 0).unwrap(); @@ -238,6 +255,14 @@ impl<'dr> VmConfig<'dr> { .add_serial_port(SerialPortNumber::Com1) .context("adding serial port to spec")?; + if let Some(boot_order) = self.boot_order.as_ref() { + spec_builder + .set_boot_order( + boot_order.iter().map(|x| x.to_string()).collect(), + ) + .context("adding boot order to spec")?; + } + let instance_spec = spec_builder.finish(); // Generate random identifiers for this instance's timeseries metadata. diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs new file mode 100644 index 000000000..e24019818 --- /dev/null +++ b/phd-tests/tests/src/boot_order.rs @@ -0,0 +1,45 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use phd_framework::{ + disk::DiskSource, + test_vm::{DiskBackend, DiskInterface}, +}; +use phd_testcase::*; +// use tracing::{info, warn}; + +#[phd_testcase] +async fn configurable_boot_order(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("configurable_boot_order"); + cfg.data_disk( + "alpine-3-20", + DiskSource::Artifact("alpine-3-20"), + DiskInterface::Virtio, + DiskBackend::File, + 16, + ); + + cfg.data_disk( + "alpine-3-16", + DiskSource::Artifact("alpine-3-16"), + DiskInterface::Virtio, + DiskBackend::File, + 24, + ); + + cfg.boot_order(vec!["alpine-3-20", "alpine-3-16"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let kernel = vm.run_shell_command("uname -r").await?; + if kernel.contains("6.6.41-0-virt") { + panic!("booted 6.6.41"); + } else if kernel.contains("5.15.41-0-virt") { + panic!("booted 5.15.41"); + } else { + eprintln!("dunno what we booted? {}", kernel); + } +} diff --git a/phd-tests/tests/src/disk.rs b/phd-tests/tests/src/disk.rs index 11a6e90c7..1db141c90 100644 --- a/phd-tests/tests/src/disk.rs +++ b/phd-tests/tests/src/disk.rs @@ -17,6 +17,7 @@ async fn in_memory_backend_smoke_test(ctx: &Framework) { let mut data = FatFilesystem::new(); data.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; cfg.data_disk( + "data-disk-0", DiskSource::FatFilesystem(data), DiskInterface::Virtio, DiskBackend::InMemory { readonly: true }, diff --git a/phd-tests/tests/src/lib.rs b/phd-tests/tests/src/lib.rs index a4968487a..37c9efcb3 100644 --- a/phd-tests/tests/src/lib.rs +++ b/phd-tests/tests/src/lib.rs @@ -4,6 +4,7 @@ pub use phd_testcase; +mod boot_order; mod crucible; mod disk; mod framework;