From ad0a49b011f6edd7e2274f3e738ea6b998ec3482 Mon Sep 17 00:00:00 2001 From: Andrew Bays Date: Tue, 9 Jan 2024 10:40:24 +0000 Subject: [PATCH] [OSPK8-784] Allow user-provided cloud-init secrets for BMHs --- api/v1beta1/common_openstackbaremetalset.go | 6 +- .../openstackbaremetalset_controller.go | 669 +++++++++++++----- pkg/baremetalset/const.go | 3 - 3 files changed, 480 insertions(+), 198 deletions(-) diff --git a/api/v1beta1/common_openstackbaremetalset.go b/api/v1beta1/common_openstackbaremetalset.go index 0ca3a8cf..a92ff286 100644 --- a/api/v1beta1/common_openstackbaremetalset.go +++ b/api/v1beta1/common_openstackbaremetalset.go @@ -115,10 +115,10 @@ func VerifyBaremetalStatusHostRefs( } // VerifyBaremetalSetScaleUp - -func VerifyBaremetalSetScaleUp(log logr.Logger, instance *OpenStackBaremetalSet, allBmhs *metal3v1.BareMetalHostList, existingBmhs *metal3v1.BareMetalHostList) ([]string, error) { +func VerifyBaremetalSetScaleUp(log logr.Logger, instance *OpenStackBaremetalSet, allBmhs *metal3v1.BareMetalHostList, existingBmhs *metal3v1.BareMetalHostList) ([]metal3v1.BareMetalHost, error) { // How many new BaremetalHost allocations do we need (if any)? newBmhsNeededCount := instance.Spec.Count - len(existingBmhs.Items) - availableBaremetalHosts := []string{} + availableBaremetalHosts := []metal3v1.BareMetalHost{} if newBmhsNeededCount > 0 { // We have new replicas requested, so search for baremetalhosts that don't have consumerRef or Online set @@ -162,7 +162,7 @@ func VerifyBaremetalSetScaleUp(log logr.Logger, instance *OpenStackBaremetalSet, log.Info(fmt.Sprintf("Available BaremetalHost: %s", baremetalHost.ObjectMeta.Name)) - availableBaremetalHosts = append(availableBaremetalHosts, baremetalHost.ObjectMeta.Name) + availableBaremetalHosts = append(availableBaremetalHosts, baremetalHost) } // If we can't satisfy the new requested replica count, explicitly state so diff --git a/controllers/openstackbaremetalset_controller.go b/controllers/openstackbaremetalset_controller.go index 3bf9f142..914c4cca 100644 --- a/controllers/openstackbaremetalset_controller.go +++ b/controllers/openstackbaremetalset_controller.go @@ -46,7 +46,7 @@ import ( "github.com/openstack-k8s-operators/osp-director-operator/api/shared" ospdirectorv1beta1 "github.com/openstack-k8s-operators/osp-director-operator/api/v1beta1" "github.com/openstack-k8s-operators/osp-director-operator/pkg/baremetalset" - common "github.com/openstack-k8s-operators/osp-director-operator/pkg/common" + "github.com/openstack-k8s-operators/osp-director-operator/pkg/common" openstackipset "github.com/openstack-k8s-operators/osp-director-operator/pkg/openstackipset" "github.com/openstack-k8s-operators/osp-director-operator/pkg/provisionserver" ) @@ -788,7 +788,9 @@ func (r *OpenStackBaremetalSetReconciler) ensureBaremetalHosts( newBmhsNeededCount := instance.Spec.Count - len(existingBaremetalHosts.Items) // Sort the list of available BaremetalHosts - sort.Strings(availableBaremetalHosts) + sort.Slice(availableBaremetalHosts[:], func(i, j int) bool { + return availableBaremetalHosts[i].Name < availableBaremetalHosts[j].Name + }) // For each available BaremetalHost that we need to allocate, we update the // reference to use our image and set the user data to use our cloud-init secret. @@ -800,7 +802,7 @@ func (r *OpenStackBaremetalSetReconciler) ensureBaremetalHosts( instance, cond, osNetCfg, - availableBaremetalHosts[i], + &availableBaremetalHosts[i], provisionServer.Status.LocalImageURL, sshSecret, passwordSecret, @@ -818,7 +820,7 @@ func (r *OpenStackBaremetalSetReconciler) ensureBaremetalHosts( instance, cond, osNetCfg, - bmh.GetName(), + &bmh, provisionServer.Status.LocalImageURL, sshSecret, passwordSecret, @@ -851,25 +853,21 @@ func (r *OpenStackBaremetalSetReconciler) getBmhHostRefStatus( return ospdirectorv1beta1.HostStatus{}, k8s_errors.NewNotFound(corev1.Resource("OpenStackBaremetalHostStatus"), "not found") } -// Provision a BaremetalHost via Metal3 (and create its bootstrapping secret) +// Provision a BaremetalHost via Metal3 (and potentially create its bootstrapping secrets) func (r *OpenStackBaremetalSetReconciler) baremetalHostProvision( ctx context.Context, instance *ospdirectorv1beta1.OpenStackBaremetalSet, cond *shared.Condition, osNetCfg *ospdirectorv1beta1.OpenStackNetConfig, - bmh string, + bmh *metal3v1.BareMetalHost, localImageURL string, sshSecret string, passwordSecret *corev1.Secret, ) error { - // Prepare cloudinit (create secret) - sts := []common.Template{} - secretLabels := common.GetLabels(instance, baremetalset.AppLabel, map[string]string{}) - // // get already registerd OSBms bmh status for bmh name // - bmhStatus, err := r.getBmhHostRefStatus(instance, cond, bmh) + bmhStatus, err := r.getBmhHostRefStatus(instance, cond, bmh.Name) // // if bmhStatus is not found, get free hostname from instance.Status.baremetalHosts (HostRef == ospdirectorv1beta1.HostRefInitState ("unassigned")) for the new bmh // @@ -877,7 +875,7 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostProvision( for _, bmhStatus = range instance.Status.BaremetalHosts { if bmhStatus.HostRef == shared.HostRefInitState { - bmhStatus.HostRef = bmh + bmhStatus.HostRef = bmh.Name instance.Status.BaremetalHosts[bmhStatus.Hostname] = bmhStatus // update status with host assignment @@ -893,7 +891,7 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostProvision( common.LogForObject( r, - fmt.Sprintf("Assigned %s to baremetalhost %s", bmhStatus.Hostname, bmh), + fmt.Sprintf("Assigned %s to baremetalhost %s", bmhStatus.Hostname, bmh.Name), instance, ) @@ -902,176 +900,56 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostProvision( } } - // User data cloud-init secret - templateParameters := make(map[string]interface{}) - templateParameters["AuthorizedKeys"] = sshSecret - templateParameters["Hostname"] = bmhStatus.Hostname - templateParameters["DomainName"] = osNetCfg.Spec.DomainName - - // - // use same NodeRootPassword paremater as tripleo have + // Handle cloud-init concerns // - if passwordSecret != nil && len(passwordSecret.Data["NodeRootPassword"]) > 0 { - templateParameters["NodeRootPassword"] = string(passwordSecret.Data["NodeRootPassword"]) - } - - userDataSecretName := fmt.Sprintf(baremetalset.CloudInitUserDataSecretName, instance.Name, bmh) - - userDataSt := common.Template{ - Name: userDataSecretName, - Namespace: "openshift-machine-api", - Type: common.TemplateTypeConfig, - InstanceType: instance.Kind, - AdditionalTemplate: map[string]string{"userData": "/baremetalset/cloudinit/userdata"}, - Labels: secretLabels, - ConfigOptions: templateParameters, - } - - sts = append(sts, userDataSt) - - labelSelector := map[string]string{ - shared.ControlPlaneNetworkLabelSelector: strconv.FormatBool(true), - } - - ctlplaneNets, err := ospdirectorv1beta1.GetOpenStackNetsMapWithLabel( - r.Client, - instance.Namespace, - labelSelector, + // Get the cloud-init secrets for this BMH (if any) and check their labels. + // If there are no current cloud-init secrets on the BMH or if there are and + // their labels indicate that the secrets were generated by us, then we process + // the cloud-init secrets as we normally would (in other words, we execute the + // auto-generate reconciliation logic to generate them ourselves). If our + // labels are missing, this means that the user created the secrets and + // manually set them on the BMH, so we need to leave them alone (aside from + // adding a label to flag them for collection during a potential execution of + // a "must-gather"). In either case, "r.cloudInitProvision" returns the + // cloud-init secret references (assuming there was no error) and we will use + // them further below. + + userDataSecretRef, networkDataSecretRef, err := r.cloudInitProvision( + ctx, + instance, + cond, + osNetCfg, + bmh, + sshSecret, + bmhStatus.Hostname, + passwordSecret, ) - if err != nil { - cond.Message = fmt.Sprintf("Error getting ctlplane OSNets with labelSelector %v", labelSelector) - cond.Reason = shared.CommonCondReasonOSNetError - cond.Type = shared.CommonCondTypeError - err = common.WrapErrorForObject(cond.Message, instance, err) - return err - } - var netNameLower string - var ctlPlaneNetwork ospdirectorv1beta1.OpenStackNet - -outer: - for netName, osNet := range ctlplaneNets { - for _, myNet := range instance.Spec.Networks { - if myNet == netName { - netNameLower = netName - ctlPlaneNetwork = osNet - break outer - } - } - } - - if netNameLower == "" { - cond.Message = "Ctlplane network not found" - cond.Reason = shared.CommonCondReasonOSNetError - cond.Type = shared.CommonCondTypeError - err = common.WrapErrorForObject(cond.Message, instance, err) - return err - } - - ipCidr := instance.Status.BaremetalHosts[bmhStatus.Hostname].IPAddresses[netNameLower] - ip, network, err := net.ParseCIDR(ipCidr) if err != nil { - cond.Message = fmt.Sprintf("Error parsing IP CIDR %v for network %v", ipCidr, netNameLower) - cond.Reason = shared.CommonCondReasonOSNetError - cond.Type = shared.CommonCondTypeError - err = common.WrapErrorForObject(cond.Message, instance, err) - return err - } - - netMask := network.Mask - - // Network data cloud-init secret - templateParameters = make(map[string]interface{}) - templateParameters["CtlplaneIp"] = ip.String() - templateParameters["CtlplaneInterface"] = instance.Spec.CtlplaneInterface - templateParameters["CtlplaneGateway"] = ctlPlaneNetwork.Spec.Gateway - templateParameters["CtlplaneNetmask"] = net.IP(netMask).String() - if len(instance.Spec.BootstrapDNS) > 0 { - templateParameters["CtlplaneDns"] = instance.Spec.BootstrapDNS - } else { - templateParameters["CtlplaneDns"] = osNetCfg.Spec.DNSServers - } - - if len(instance.Spec.DNSSearchDomains) > 0 { - templateParameters["CtlplaneDnsSearch"] = instance.Spec.DNSSearchDomains - } else { - templateParameters["CtlplaneDnsSearch"] = osNetCfg.Spec.DNSSearchDomains - } - - routes := []map[string]string{} - for _, route := range ctlPlaneNetwork.Spec.Routes { - _, routeNetwork, err := net.ParseCIDR(route.Destination) - if err != nil { - cond.Message = fmt.Sprintf("Error parsing route CIDR %v for network %v", route.Destination, netNameLower) - cond.Reason = shared.CommonCondReasonOSNetError - cond.Type = shared.CommonCondTypeError - err = common.WrapErrorForObject(cond.Message, instance, err) - return err - } - routes = append(routes, map[string]string{"network": routeNetwork.IP.String(), "netmask": net.IP(routeNetwork.Mask).String(), "gateway": route.Nexthop}) - } - - templateParameters["CtlplaneRoutes"] = routes - - networkDataSecretName := fmt.Sprintf(baremetalset.CloudInitNetworkDataSecretName, instance.Name, bmh) - - // Flag the network data secret as safe to collect with must-gather - secretLabelsWithMustGather := common.GetLabels(instance, baremetalset.AppLabel, map[string]string{ - common.MustGatherSecret: "yes", - }) - - networkDataSt := common.Template{ - Name: networkDataSecretName, - Namespace: "openshift-machine-api", - Type: common.TemplateTypeConfig, - InstanceType: instance.Kind, - AdditionalTemplate: map[string]string{"networkData": "/baremetalset/cloudinit/networkdata"}, - Labels: secretLabelsWithMustGather, - ConfigOptions: templateParameters, - } - - sts = append(sts, networkDataSt) - - err = common.EnsureSecrets(ctx, r, instance, sts, &map[string]common.EnvSetter{}) - if err != nil { - cond.Message = "Error creating metal3 cloud-init secrets" - cond.Reason = shared.BaremetalHostCondReasonCloudInitSecretError - cond.Type = shared.CommonCondTypeError - err = common.WrapErrorForObject(cond.Message, instance, err) - return err } // // Provision the BaremetalHost // - foundBaremetalHost := &metal3v1.BareMetalHost{} - err = r.Get(ctx, types.NamespacedName{Name: bmh, Namespace: "openshift-machine-api"}, foundBaremetalHost) - if err != nil { - cond.Message = fmt.Sprintf("Failed to get %s %s", foundBaremetalHost.Kind, bmh) - cond.Reason = shared.BaremetalHostCondReasonGetError - cond.Type = shared.CommonCondTypeError - return err - } - - op, err := controllerutil.CreateOrPatch(ctx, r.Client, foundBaremetalHost, func() error { + op, err := controllerutil.CreateOrPatch(ctx, r.Client, bmh, func() error { // // Set our ownership labels so we can watch this resource // Set ownership labels that can be found by the respective controller kind - labelSelector = common.GetLabels(instance, baremetalset.AppLabel, map[string]string{ + labelSelector := common.GetLabels(instance, baremetalset.AppLabel, map[string]string{ common.OSPHostnameLabelSelector: bmhStatus.Hostname, }) - foundBaremetalHost.Labels = shared.MergeStringMaps( - foundBaremetalHost.GetLabels(), + bmh.Labels = shared.MergeStringMaps( + bmh.GetLabels(), labelSelector, ) // // Ensure the image url is up to date unless already provisioned // - if foundBaremetalHost.Status.Provisioning.State != metal3v1.StateProvisioned { - foundBaremetalHost.Spec.Image = &metal3v1.Image{ + if bmh.Status.Provisioning.State != metal3v1.StateProvisioned { + bmh.Spec.Image = &metal3v1.Image{ URL: localImageURL, Checksum: fmt.Sprintf("%s.md5sum", localImageURL), } @@ -1080,27 +958,21 @@ outer: // // Update the BMH spec once when ConsumerRef is nil to only perform one time provision. // - if foundBaremetalHost.Spec.ConsumerRef == nil { - foundBaremetalHost.Spec.Online = true - foundBaremetalHost.Spec.ConsumerRef = &corev1.ObjectReference{Name: instance.Name, Kind: instance.Kind, Namespace: instance.Namespace} - foundBaremetalHost.Spec.UserData = &corev1.SecretReference{ - Name: userDataSecretName, - Namespace: "openshift-machine-api", - } - foundBaremetalHost.Spec.NetworkData = &corev1.SecretReference{ - Name: networkDataSecretName, - Namespace: "openshift-machine-api", - } + if bmh.Spec.ConsumerRef == nil { + bmh.Spec.Online = true + bmh.Spec.ConsumerRef = &corev1.ObjectReference{Name: instance.Name, Kind: instance.Kind, Namespace: instance.Namespace} + bmh.Spec.UserData = userDataSecretRef + bmh.Spec.NetworkData = networkDataSecretRef } return nil }) if err != nil { - cond.Message = fmt.Sprintf("Error update %s BMH %s", foundBaremetalHost.Kind, foundBaremetalHost.Name) + cond.Message = fmt.Sprintf("Error update %s BMH %s", bmh.Kind, bmh.Name) cond.Reason = shared.BaremetalHostCondReasonUpdateError cond.Type = shared.CommonCondTypeError err = common.WrapErrorForObject(cond.Message, instance, err) - common.LogForObject(r, fmt.Sprintf("%s: %+v", cond.Message, foundBaremetalHost), foundBaremetalHost) + common.LogForObject(r, fmt.Sprintf("%s: %+v", cond.Message, bmh), bmh) return err } @@ -1108,7 +980,7 @@ outer: if op != controllerutil.OperationResultNone { common.LogForObject( r, - fmt.Sprintf("BaremetalHost %s successfully reconciled - operation: %s", foundBaremetalHost.Name, string(op)), + fmt.Sprintf("BaremetalHost %s successfully reconciled - operation: %s", bmh.Name, string(op)), instance, ) } @@ -1116,9 +988,9 @@ outer: // // Update status with BMH provisioning details // - bmhStatus.UserDataSecretName = userDataSecretName - bmhStatus.NetworkDataSecretName = networkDataSecretName - bmhStatus.ProvisioningState = shared.ProvisioningState(foundBaremetalHost.Status.Provisioning.State) + bmhStatus.UserDataSecretName = userDataSecretRef.Name + bmhStatus.NetworkDataSecretName = networkDataSecretRef.Name + bmhStatus.ProvisioningState = shared.ProvisioningState(bmh.Status.Provisioning.State) actualBMHStatus := instance.Status.BaremetalHosts[bmhStatus.Hostname] if !reflect.DeepEqual(actualBMHStatus, bmhStatus) { @@ -1136,6 +1008,291 @@ outer: } +// Creates/updates user data and network data cloud init secrets for a BMH if +// those secrets do not already exist or do exist but have our labels on them. +// If the secrets exist and do not have our labels, then it means the user +// created them manually and that we should therefore leave them alone apart +// from labeling them for potential "must-gather" collection. +func (r *OpenStackBaremetalSetReconciler) cloudInitProvision(ctx context.Context, + instance *ospdirectorv1beta1.OpenStackBaremetalSet, + cond *shared.Condition, + osNetCfg *ospdirectorv1beta1.OpenStackNetConfig, + bmh *metal3v1.BareMetalHost, + sshSecret string, + hostName string, + passwordSecret *corev1.Secret) (*corev1.SecretReference, *corev1.SecretReference, error) { + var userDataSecretRef *corev1.SecretReference + var networkDataSecretRef *corev1.SecretReference + // Prepare cloudinit (create secret) + sts := []common.Template{} + secretLabels := common.GetLabels(instance, baremetalset.AppLabel, map[string]string{ + common.MustGatherSecret: "yes", + }) + + // Flags to indicate whether we should generate cloud-init secrets (i.e. the user + // did not manually provide them on the BMH already) + generateUserDataSecret := true + generateNetworkDataSecret := true + + // In-line function used to add our "must-gather" label to user-provided cloud-init secrets + updateUserProvidedCloudInitSecret := func(secret *corev1.Secret) error { + labelSelector := map[string]string{ + common.MustGatherSecret: "yes", + } + + op, err := controllerutil.CreateOrPatch(ctx, r.GetClient(), secret, func() error { + secret.Labels = shared.MergeStringMaps( + secret.GetLabels(), + labelSelector, + ) + return nil + }) + if err != nil { + cond.Message = fmt.Sprintf("Error setting must-gather label on cloud-init secret %s in namespace %s for BMH %s during provisioning", + bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return err + } + if op != controllerutil.OperationResultNone { + r.Log.Info(fmt.Sprintf("Cloud-init secret %s %s with must-gather label in namespace %s for BMH %s during provisioning", + bmh.Spec.UserData.Name, op, bmh.Spec.UserData.Namespace, bmh.Name)) + } + + return nil + } + + // Check user data cloud-init secret. If it was not created by the user, then + // we need to make sure we create it ourselves. + if bmh.Spec.UserData != nil { + // Get the secret to check its labels + userDataSecret, _, err := common.GetSecret(ctx, r, bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace) + + if err != nil { + cond.Message = fmt.Sprintf("Error getting user data secret %s in namespace %s for BMH %s during provisioning", + bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + + if userDataSecret.Labels[common.OwnerControllerNameLabelSelector] != baremetalset.AppLabel { + // Our labels are not present on the secret (which we would otherwise + // expect to find if we had automatically generated it in a previous + // reconciliation), so this cloud-init secret was added by the user + // and should only be modified to include our "must-gather" label + + // Don't automatically generate a user data cloud-init secret below, + // since we already have one provided by the user + generateUserDataSecret = false + userDataSecretRef = bmh.Spec.UserData + + err = updateUserProvidedCloudInitSecret(userDataSecret) + + if err != nil { + return nil, nil, err + } + } + } + + if generateUserDataSecret { + // Automatically generate user data cloud-init secret (i.e. user did not + // already manually create it for the BMH) + templateParameters := make(map[string]interface{}) + templateParameters["AuthorizedKeys"] = sshSecret + templateParameters["Hostname"] = hostName + templateParameters["DomainName"] = osNetCfg.Spec.DomainName + + // + // use same NodeRootPassword paremater as tripleo have + // + if passwordSecret != nil && len(passwordSecret.Data["NodeRootPassword"]) > 0 { + templateParameters["NodeRootPassword"] = string(passwordSecret.Data["NodeRootPassword"]) + } + + userDataSecretName := fmt.Sprintf(baremetalset.CloudInitUserDataSecretName, instance.Name, bmh.Name) + + userDataSt := common.Template{ + Name: userDataSecretName, + Namespace: "openshift-machine-api", + Type: common.TemplateTypeConfig, + InstanceType: instance.Kind, + AdditionalTemplate: map[string]string{"userData": "/baremetalset/cloudinit/userdata"}, + Labels: secretLabels, + ConfigOptions: templateParameters, + } + + userDataSecretRef = &corev1.SecretReference{ + Name: userDataSecretName, + Namespace: "openshift-machine-api", + } + + sts = append(sts, userDataSt) + } + + // Check network data cloud-init secret. If it was not created by the user, then + // we need to make sure we create it ourselves. + if bmh.Spec.NetworkData != nil { + // Get the secret to check its labels + networkDataSecret, _, err := common.GetSecret(ctx, r, bmh.Spec.NetworkData.Name, bmh.Spec.NetworkData.Namespace) + + if err != nil { + cond.Message = fmt.Sprintf("Error getting network data secret %s in namespace %s for BMH %s during provisioning", + bmh.Spec.NetworkData.Name, bmh.Spec.NetworkData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + + if networkDataSecret.Labels[common.OwnerControllerNameLabelSelector] != baremetalset.AppLabel { + // Our labels are not present on the secret (which we would otherwise + // expect to find if we had automatically generated it in a previous + // reconciliation), so this cloud-init secret was added by the user + // and should only be modified to include our "must-gather" label + + // Don't automatically generate a network data cloud-init secret below, + // since we already have one provided by the user + generateNetworkDataSecret = false + networkDataSecretRef = bmh.Spec.NetworkData + + err = updateUserProvidedCloudInitSecret(networkDataSecret) + + if err != nil { + return nil, nil, err + } + } + } + + if generateNetworkDataSecret { + // Automatically generate network data cloud-init secret (i.e. user did not + // already manually create it for the BMH) + labelSelector := map[string]string{ + shared.ControlPlaneNetworkLabelSelector: strconv.FormatBool(true), + } + + ctlplaneNets, err := ospdirectorv1beta1.GetOpenStackNetsMapWithLabel( + r.Client, + instance.Namespace, + labelSelector, + ) + if err != nil { + cond.Message = fmt.Sprintf("Error getting ctlplane OSNets with labelSelector %v", labelSelector) + cond.Reason = shared.CommonCondReasonOSNetError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + + var netNameLower string + var ctlPlaneNetwork ospdirectorv1beta1.OpenStackNet + + outer: + for netName, osNet := range ctlplaneNets { + for _, myNet := range instance.Spec.Networks { + if myNet == netName { + netNameLower = netName + ctlPlaneNetwork = osNet + break outer + } + } + } + + if netNameLower == "" { + cond.Message = "Ctlplane network not found" + cond.Reason = shared.CommonCondReasonOSNetError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + + ipCidr := instance.Status.BaremetalHosts[hostName].IPAddresses[netNameLower] + ip, network, err := net.ParseCIDR(ipCidr) + if err != nil { + cond.Message = fmt.Sprintf("Error parsing IP CIDR %v for network %v", ipCidr, netNameLower) + cond.Reason = shared.CommonCondReasonOSNetError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + + netMask := network.Mask + + // Network data cloud-init secret + templateParameters := make(map[string]interface{}) + templateParameters["CtlplaneIp"] = ip.String() + templateParameters["CtlplaneInterface"] = instance.Spec.CtlplaneInterface + templateParameters["CtlplaneGateway"] = ctlPlaneNetwork.Spec.Gateway + templateParameters["CtlplaneNetmask"] = net.IP(netMask).String() + if len(instance.Spec.BootstrapDNS) > 0 { + templateParameters["CtlplaneDns"] = instance.Spec.BootstrapDNS + } else { + templateParameters["CtlplaneDns"] = osNetCfg.Spec.DNSServers + } + + if len(instance.Spec.DNSSearchDomains) > 0 { + templateParameters["CtlplaneDnsSearch"] = instance.Spec.DNSSearchDomains + } else { + templateParameters["CtlplaneDnsSearch"] = osNetCfg.Spec.DNSSearchDomains + } + + routes := []map[string]string{} + for _, route := range ctlPlaneNetwork.Spec.Routes { + _, routeNetwork, err := net.ParseCIDR(route.Destination) + if err != nil { + cond.Message = fmt.Sprintf("Error parsing route CIDR %v for network %v", route.Destination, netNameLower) + cond.Reason = shared.CommonCondReasonOSNetError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + routes = append(routes, map[string]string{"network": routeNetwork.IP.String(), "netmask": net.IP(routeNetwork.Mask).String(), "gateway": route.Nexthop}) + } + + templateParameters["CtlplaneRoutes"] = routes + + networkDataSecretName := fmt.Sprintf(baremetalset.CloudInitNetworkDataSecretName, instance.Name, bmh.Name) + + // Flag the network data secret as safe to collect with must-gather + secretLabelsWithMustGather := common.GetLabels(instance, baremetalset.AppLabel, map[string]string{ + common.MustGatherSecret: "yes", + }) + + networkDataSt := common.Template{ + Name: networkDataSecretName, + Namespace: "openshift-machine-api", + Type: common.TemplateTypeConfig, + InstanceType: instance.Kind, + AdditionalTemplate: map[string]string{"networkData": "/baremetalset/cloudinit/networkdata"}, + Labels: secretLabelsWithMustGather, + ConfigOptions: templateParameters, + } + + networkDataSecretRef = &corev1.SecretReference{ + Name: networkDataSecretName, + Namespace: "openshift-machine-api", + } + + sts = append(sts, networkDataSt) + } + + // If we have any auto-generated secrets to create/update, do so now + if len(sts) > 0 { + err := common.EnsureSecrets(ctx, r, instance, sts, &map[string]common.EnvSetter{}) + if err != nil { + cond.Message = "Error creating metal3 cloud-init secrets" + cond.Reason = shared.BaremetalHostCondReasonCloudInitSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return nil, nil, err + } + } + + return userDataSecretRef, networkDataSecretRef, nil +} + // Deprovision a BaremetalHost via Metal3 (and delete its bootstrapping secret) func (r *OpenStackBaremetalSetReconciler) baremetalHostDeprovision( ctx context.Context, @@ -1159,6 +1316,17 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostDeprovision( instance, ) + // Clean-up cloud-init secrets. If they were provided by the user, we only + // remove the "must-gather" label from them. If we auto-generated them during + // provisioning (which is indicated by the "clearUserData" and "clearNetworkData" + // bools returned from "r.cloudInitDeprovision"), then we delete them and also + // remove the reference to them from the BMH. + clearUserData, clearNetworkData, err := r.cloudInitDeprovision(ctx, instance, cond, baremetalHost) + + if err != nil { + return "", err + } + // Remove our ownership labels labels := baremetalHost.GetObjectMeta().GetLabels() ospHostname := labels[common.OSPHostnameLabelSelector] @@ -1179,8 +1347,18 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostDeprovision( baremetalHost.Spec.Online = false baremetalHost.Spec.ConsumerRef = nil baremetalHost.Spec.Image = nil - baremetalHost.Spec.UserData = nil - baremetalHost.Spec.NetworkData = nil + + if clearUserData { + // Only set "UserData" to nil if it was not manually set by the user, + // which this bool flag indicates + baremetalHost.Spec.UserData = nil + } + if clearNetworkData { + // Only set "NetworkData" to nil if it was not manually set by the user, + // which this bool flag indicates + baremetalHost.Spec.NetworkData = nil + } + err = r.Update(ctx, baremetalHost) if err != nil { cond.Message = fmt.Sprintf("Failed to update %s %s", baremetalHost.Kind, baremetalHost.Name) @@ -1191,12 +1369,122 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostDeprovision( } r.Log.Info(fmt.Sprintf("BaremetalHost deleted: bmh %s - osp name %s", baremetalHost.Name, ospHostname)) - // Also remove userdata and networkdata secrets - for _, secret := range []string{ - fmt.Sprintf(baremetalset.CloudInitUserDataSecretName, instance.Name, bmh.HostRef), - fmt.Sprintf(baremetalset.CloudInitNetworkDataSecretName, instance.Name, bmh.HostRef), - } { - err = common.DeleteSecretsWithName( + // Set status (remove this BaremetalHost entry) + delete(instance.Status.BaremetalHosts, bmh.Hostname) + + return ospHostname, nil +} + +// Deletes user data and network data cloud init secrets for a BMH if those secrets +// have our labels on them. If the secrets do not have our labels, then it means +// the user created them manually and that we should therefore leave them alone apart +// from removing our "must-gather" label that we added during BMH provisioning. +func (r *OpenStackBaremetalSetReconciler) cloudInitDeprovision(ctx context.Context, + instance *ospdirectorv1beta1.OpenStackBaremetalSet, + cond *shared.Condition, + bmh *metal3v1.BareMetalHost) (bool, bool, error) { + + secretsToDelete := []string{} + // Return these values as true to indicate to the caller that + // the associated BMH's ".spec.UserData" and ".spec.NetworkData" + // should be set to nil + clearUserData := true + clearNetworkData := true + + // In-line function used to remove our "must-gather" label from user-provided cloud-init secrets + updateUserProvidedCloudInitSecret := func(secret *corev1.Secret) error { + + labels := secret.GetLabels() + delete(labels, common.MustGatherSecret) + + op, err := controllerutil.CreateOrPatch(ctx, r.GetClient(), secret, func() error { + secret.SetLabels(labels) + return nil + }) + if err != nil { + cond.Message = fmt.Sprintf("Error removing must-gather label on cloud-init secret %s in namespace %s for BMH %s during deprovisioning", + bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return err + } + if op != controllerutil.OperationResultNone { + r.Log.Info(fmt.Sprintf("Cloud-init secret %s %s to remove must-gather label in namespace %s for BMH %s during deprovisioning", + bmh.Spec.UserData.Name, op, bmh.Spec.UserData.Namespace, bmh.Name)) + } + + return nil + } + + // Check user data cloud-init secret. If it was created by the user, we + // should not delete it and instead just remove our "must-gather" label + if bmh.Spec.UserData != nil { + // Get the secret to check its labels + userDataSecret, _, err := common.GetSecret(ctx, r, bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace) + + if err != nil { + cond.Message = fmt.Sprintf("Error getting user data secret %s in namespace %s for BMH %s during deprovisioning", + bmh.Spec.UserData.Name, bmh.Spec.UserData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return false, false, err + } + + if userDataSecret.Labels[common.OwnerControllerNameLabelSelector] == baremetalset.AppLabel { + // Our labels are present on the secret (which we would expect to find if + // we had automatically generated it in during provisioning in a previous + // reconciliation), so this cloud-init secret was NOT added by the user + // and SHOULD be deleted + secretsToDelete = append(secretsToDelete, fmt.Sprintf(baremetalset.CloudInitUserDataSecretName, instance.Name, bmh.Name)) + } else { + // This is a user-provided cloud-init secret, so we don't delete it. However, + // we still need to remove our "must-gather" label from it + clearUserData = false + err = updateUserProvidedCloudInitSecret(userDataSecret) + + if err != nil { + return false, false, err + } + } + } + + // Check network data cloud-init secret. If it was created by the user, we + // should not delete it and instead just remove our "must-gather" label + if bmh.Spec.NetworkData != nil { + // Get the secret to check its labels + networkDataSecret, _, err := common.GetSecret(ctx, r, bmh.Spec.NetworkData.Name, bmh.Spec.NetworkData.Namespace) + + if err != nil { + cond.Message = fmt.Sprintf("Error getting network data secret %s in namespace %s for BMH %s during deprovisioning", + bmh.Spec.NetworkData.Name, bmh.Spec.NetworkData.Namespace, bmh.Name) + cond.Reason = shared.CommonCondReasonSecretError + cond.Type = shared.CommonCondTypeError + err = common.WrapErrorForObject(cond.Message, instance, err) + return false, false, err + } + + if networkDataSecret.Labels[common.OwnerControllerNameLabelSelector] == baremetalset.AppLabel { + // Our labels are present on the secret (which we would expect to find if + // we had automatically generated it in a previous reconciliation), so this + // cloud-init secret was NOT added by the user and SHOULD be deleted + secretsToDelete = append(secretsToDelete, fmt.Sprintf(baremetalset.CloudInitNetworkDataSecretName, instance.Name, bmh.Name)) + } else { + // This is a user-provided cloud-init secret, so we don't delete it. However, + // we still need to remove our "must-gather" label from it + clearNetworkData = false + err = updateUserProvidedCloudInitSecret(networkDataSecret) + + if err != nil { + return false, false, err + } + } + } + + // Also potentially delete userdata and networkdata secrets + for _, secret := range secretsToDelete { + err := common.DeleteSecretsWithName( ctx, r, cond, @@ -1204,16 +1492,13 @@ func (r *OpenStackBaremetalSetReconciler) baremetalHostDeprovision( "openshift-machine-api", ) if err != nil { - return ospHostname, err + return false, false, err } - r.Log.Info(fmt.Sprintf("Network data secret deleted: name %s", secret)) + r.Log.Info(fmt.Sprintf("Cloud-init data secret deleted: name %s", secret)) } - // Set status (remove this BaremetalHost entry) - delete(instance.Status.BaremetalHosts, bmh.Hostname) - - return ospHostname, nil + return clearUserData, clearNetworkData, nil } // Deprovision all associated BaremetalHosts for this OpenStackBaremetalSet via Metal3 diff --git a/pkg/baremetalset/const.go b/pkg/baremetalset/const.go index 13118ff9..b3bf92d3 100644 --- a/pkg/baremetalset/const.go +++ b/pkg/baremetalset/const.go @@ -20,9 +20,6 @@ const ( // AppLabel - AppLabel = "osp-baremetalset" - // BaremetalHostRemovalAnnotation - Annotation key placed on BaremetalHost resources to target them for OpenStackBaremetalSet scale-down - BaremetalHostRemovalAnnotation = "osp-director.openstack.org/baremetalset-delete-baremetalhost" - // CloudInitUserDataSecretName - Naming template used for generating BaremetalHost cloudinit userdata secrets CloudInitUserDataSecretName = "%s-cloudinit-userdata-%s" // CloudInitNetworkDataSecretName - Naming template used for generating BaremetalHost cloudinit networkdata secrets