From 20c67ff77562bcd3c8cda1e320036f7c208c96ce Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Wed, 1 Apr 2026 13:11:56 +1000 Subject: [PATCH 1/4] Fix HFS setCondition ignoring ObservedGeneration changes The setCondition function in the HostFirmwareSettings controller was checking whether the condition changed by comparing against the old info.hfs.Status.Conditions rather than the newly updated status. When only ObservedGeneration changed but the condition status remained the same, the function returned false (no change), causing the status update to be skipped. This left ObservedGeneration permanently stale, blocking the BMH controller's generation-based consistency checks. Fix by using the return value of meta.SetStatusCondition, which correctly detects ObservedGeneration changes. Also remove the unused info parameter from setCondition since it is no longer needed. Assisted-By: Claude Opus 4.6 Signed-off-by: Jacob Anders --- .../hostfirmwaresettings_controller.go | 19 +++++--------- .../metal3.io/hostfirmwaresettings_test.go | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/internal/controller/metal3.io/hostfirmwaresettings_controller.go b/internal/controller/metal3.io/hostfirmwaresettings_controller.go index 7cc291120f..84390893f2 100644 --- a/internal/controller/metal3.io/hostfirmwaresettings_controller.go +++ b/internal/controller/metal3.io/hostfirmwaresettings_controller.go @@ -247,7 +247,7 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info generation := info.hfs.GetGeneration() if specMismatch { - if setCondition(generation, &newStatus, info, metal3api.FirmwareSettingsChangeDetected, metav1.ConditionTrue, reason, "") { + if setCondition(generation, &newStatus, metal3api.FirmwareSettingsChangeDetected, metav1.ConditionTrue, reason, "") { dirty = true } @@ -255,7 +255,7 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info // Eventually this will be handled by a webhook errors := r.validateHostFirmwareSettings(info, &newStatus, schema) if len(errors) == 0 { - if setCondition(generation, &newStatus, info, metal3api.FirmwareSettingsValid, metav1.ConditionTrue, reason, "") { + if setCondition(generation, &newStatus, metal3api.FirmwareSettingsValid, metav1.ConditionTrue, reason, "") { dirty = true } } else { @@ -266,15 +266,15 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info } } reason = reasonConfigurationError - if setCondition(generation, &newStatus, info, metal3api.FirmwareSettingsValid, metav1.ConditionFalse, reason, "Invalid BIOS setting") { + if setCondition(generation, &newStatus, metal3api.FirmwareSettingsValid, metav1.ConditionFalse, reason, "Invalid BIOS setting") { dirty = true } } } else { - if setCondition(generation, &newStatus, info, metal3api.FirmwareSettingsValid, metav1.ConditionTrue, reason, "") { + if setCondition(generation, &newStatus, metal3api.FirmwareSettingsValid, metav1.ConditionTrue, reason, "") { dirty = true } - if setCondition(generation, &newStatus, info, metal3api.FirmwareSettingsChangeDetected, metav1.ConditionFalse, reason, "") { + if setCondition(generation, &newStatus, metal3api.FirmwareSettingsChangeDetected, metav1.ConditionFalse, reason, "") { dirty = true } } @@ -425,7 +425,7 @@ func (r *HostFirmwareSettingsReconciler) publishEvent(ctx context.Context, reque } } -func setCondition(generation int64, status *metal3api.HostFirmwareSettingsStatus, info *rInfo, +func setCondition(generation int64, status *metal3api.HostFirmwareSettingsStatus, cond metal3api.SettingsConditionType, newStatus metav1.ConditionStatus, reason conditionReason, message string) bool { newCondition := metav1.Condition{ @@ -435,13 +435,8 @@ func setCondition(generation int64, status *metal3api.HostFirmwareSettingsStatus Reason: string(reason), Message: message, } - meta.SetStatusCondition(&status.Conditions, newCondition) - currCond := meta.FindStatusCondition(info.hfs.Status.Conditions, string(cond)) - if currCond == nil || currCond.Status != newStatus { - return true - } - return false + return meta.SetStatusCondition(&status.Conditions, newCondition) } // Generate a name based on the schema key and values which should be the same for similar hardware. diff --git a/internal/controller/metal3.io/hostfirmwaresettings_test.go b/internal/controller/metal3.io/hostfirmwaresettings_test.go index e5eb930a69..c852829a28 100644 --- a/internal/controller/metal3.io/hostfirmwaresettings_test.go +++ b/internal/controller/metal3.io/hostfirmwaresettings_test.go @@ -540,6 +540,32 @@ func TestStoreHostFirmwareSettings(t *testing.T) { } } +func TestSetConditionObservedGeneration(t *testing.T) { + status := &metal3api.HostFirmwareSettingsStatus{} + + changed := setCondition(1, status, + metal3api.FirmwareSettingsValid, metav1.ConditionTrue, + reasonSuccess, "") + assert.True(t, changed, "first call should report a change") + + cond := status.Conditions[0] + assert.Equal(t, int64(1), cond.ObservedGeneration) + + changed = setCondition(1, status, + metal3api.FirmwareSettingsValid, metav1.ConditionTrue, + reasonSuccess, "") + assert.False(t, changed, "same generation and status should not report a change") + + // This is the scenario that was broken: generation bumps but status + // stays the same. The old code compared only Status and missed the + // ObservedGeneration difference, returning false. + changed = setCondition(2, status, + metal3api.FirmwareSettingsValid, metav1.ConditionTrue, + reasonSuccess, "") + assert.True(t, changed, "generation change alone must report a change") + assert.Equal(t, int64(2), status.Conditions[0].ObservedGeneration) +} + // Test the function to validate hostFirmwareSettings. func TestValidateHostFirmwareSettings(t *testing.T) { testCases := []struct { From 32ee653e7f16d1651cb1b63e94a6fcd45d72537e Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Wed, 1 Apr 2026 13:25:58 +1000 Subject: [PATCH 2/4] Handle generation mismatch in servicing error recovery When a user clears HFC/HFS specs to abort a failed servicing operation, the BMH controller calls getHostFirmwareSettings/Components which check that metadata.generation matches status.conditions.observedGeneration. Since the HFS/HFC sub-controllers may not have reconciled yet after the spec change, this generation mismatch returns an error that blocks the abort path, leaving the host stuck in ServicingError with exponential backoff. Fix with two complementary changes: 1. Add a fast-path in doServiceIfNeeded: when in ServicingError state, check specs directly via lightweight r.Get() calls (which don't validate generation) before the generation-sensitive functions. If specs are cleared, abort immediately via prov.Service() and clear the error/operational status on success. 2. Watch HostFirmwareSettings and HostFirmwareComponents for generation changes (spec modifications), mapping events to the corresponding BMH. This ensures the BMH controller reconciles promptly when specs change, rather than waiting for error-state exponential backoff. Assisted-By: Claude Opus 4.6 Signed-off-by: Jacob Anders --- .../metal3.io/baremetalhost_controller.go | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/internal/controller/metal3.io/baremetalhost_controller.go b/internal/controller/metal3.io/baremetalhost_controller.go index 9d032f1069..ee514d3723 100644 --- a/internal/controller/metal3.io/baremetalhost_controller.go +++ b/internal/controller/metal3.io/baremetalhost_controller.go @@ -47,6 +47,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/predicate" ) @@ -1474,6 +1475,42 @@ func (r *BareMetalHostReconciler) doServiceIfNeeded(ctx context.Context, prov pr liveFirmwareUpdatesAllowed = (hup.Spec.FirmwareUpdates == metal3api.HostUpdatePolicyOnReboot) } + // Fast-path: when recovering from a servicing error, check if specs are + // cleared before calling getHostFirmwareSettings/Components. Those + // functions fail on generation mismatch (sub-controller hasn't reconciled + // yet), which would block the abort/recovery path indefinitely. + if info.host.Status.ErrorType == metal3api.ServicingError { + specsExist := false + if liveFirmwareSettingsAllowed { + hfsCheck := &metal3api.HostFirmwareSettings{} + if err := r.Get(ctx, info.request.NamespacedName, hfsCheck); err == nil && len(hfsCheck.Spec.Settings) > 0 { + specsExist = true + } + } + if liveFirmwareUpdatesAllowed && !specsExist { + hfcCheck := &metal3api.HostFirmwareComponents{} + if err := r.Get(ctx, info.request.NamespacedName, hfcCheck); err == nil && len(hfcCheck.Spec.Updates) > 0 { + specsExist = true + } + } + if !specsExist { + info.log.Info("specs cleared while in servicing error state, attempting abort") + provResult, _, err := prov.Service(ctx, servicingData, false, false) + if err != nil { + return actionError{fmt.Errorf("failed to abort servicing: %w", err)} + } + if provResult.ErrorMessage != "" { + return actionError{fmt.Errorf("failed to abort servicing: %s", provResult.ErrorMessage)} + } + if provResult.Dirty { + return actionContinue{provResult.RequeueAfter} + } + info.log.Info("successfully recovered from servicing error") + clearErrorWithStatus(info.host, metal3api.OperationalStatusOK) + return actionComplete{} + } + } + if liveFirmwareSettingsAllowed { // handling pre-HFS FirmwareSettings here if !reflect.DeepEqual(info.host.Status.Provisioning.Firmware, info.host.Spec.Firmware) { @@ -2538,6 +2575,22 @@ func (r *BareMetalHostReconciler) SetupWithManager(mgr ctrl.Manager, preprovImgE controller.Owns(&metal3api.PreprovisioningImage{}) } + // Watch HFC/HFS for spec changes (generation bumps) so that the BMH + // controller reconciles promptly when a user clears firmware specs, + // rather than waiting for error-state exponential backoff to expire. + firmwareEventHandler := handler.EnqueueRequestsFromMapFunc( + func(_ context.Context, obj client.Object) []ctrl.Request { + return []ctrl.Request{{NamespacedName: client.ObjectKey{ + Name: obj.GetName(), + Namespace: obj.GetNamespace(), + }}} + }, + ) + controller.Watches(&metal3api.HostFirmwareSettings{}, firmwareEventHandler, + builder.WithPredicates(predicate.GenerationChangedPredicate{})) + controller.Watches(&metal3api.HostFirmwareComponents{}, firmwareEventHandler, + builder.WithPredicates(predicate.GenerationChangedPredicate{})) + return controller.Complete(r) } From 16dd5287d42a5eff148b24001d897a08cebccd39 Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Mon, 25 May 2026 22:51:44 +1000 Subject: [PATCH 3/4] Harden servicing abort fast-path and add tests Extract hostFirmwareSpecsExist to handle Get errors safely, include legacy Spec.Firmware, and gate recovery on active HFS/HFC policy paths. Add unit tests and an e2e case for aborting servicing by clearing HFS spec. --- .../metal3.io/baremetalhost_controller.go | 68 ++- .../baremetalhost_controller_test.go | 101 ++++ test/e2e/firmware_settings_test.go | 571 ++++++++++++++++++ 3 files changed, 726 insertions(+), 14 deletions(-) create mode 100644 test/e2e/firmware_settings_test.go diff --git a/internal/controller/metal3.io/baremetalhost_controller.go b/internal/controller/metal3.io/baremetalhost_controller.go index ee514d3723..68f19a6857 100644 --- a/internal/controller/metal3.io/baremetalhost_controller.go +++ b/internal/controller/metal3.io/baremetalhost_controller.go @@ -1458,6 +1458,51 @@ func (r *BareMetalHostReconciler) actionDeprovisioning(ctx context.Context, prov return actionComplete{} } +// hostFirmwareSpecsExist reports whether firmware servicing specs are still present +// on the host or related HFS/HFC objects. Used by the ServicingError recovery fast-path +// before calling getHostFirmwareSettings/Components, which block on generation mismatch. +// Only NotFound errors from Get are treated as absent resources; other errors are returned. +func (r *BareMetalHostReconciler) hostFirmwareSpecsExist( + ctx context.Context, + info *reconcileInfo, + liveFirmwareSettingsAllowed, liveFirmwareUpdatesAllowed bool, +) (bool, error) { + specsExist := false + + if liveFirmwareSettingsAllowed { + if !reflect.DeepEqual(info.host.Status.Provisioning.Firmware, info.host.Spec.Firmware) && + info.host.Spec.Firmware != nil { + specsExist = true + } + + if !specsExist { + hfsCheck := &metal3api.HostFirmwareSettings{} + err := r.Get(ctx, info.request.NamespacedName, hfsCheck) + if err != nil { + if !k8serrors.IsNotFound(err) { + return false, fmt.Errorf("could not load host firmware settings: %w", err) + } + } else if len(hfsCheck.Spec.Settings) > 0 { + specsExist = true + } + } + } + + if liveFirmwareUpdatesAllowed { + hfcCheck := &metal3api.HostFirmwareComponents{} + err := r.Get(ctx, info.request.NamespacedName, hfcCheck) + if err != nil { + if !k8serrors.IsNotFound(err) { + return false, fmt.Errorf("could not load host firmware components: %w", err) + } + } else if len(hfcCheck.Spec.Updates) > 0 { + specsExist = true + } + } + + return specsExist, nil +} + func (r *BareMetalHostReconciler) doServiceIfNeeded(ctx context.Context, prov provisioner.Provisioner, info *reconcileInfo, hup *metal3api.HostUpdatePolicy) (result actionResult) { servicingData := provisioner.ServicingData{} @@ -1479,19 +1524,11 @@ func (r *BareMetalHostReconciler) doServiceIfNeeded(ctx context.Context, prov pr // cleared before calling getHostFirmwareSettings/Components. Those // functions fail on generation mismatch (sub-controller hasn't reconciled // yet), which would block the abort/recovery path indefinitely. - if info.host.Status.ErrorType == metal3api.ServicingError { - specsExist := false - if liveFirmwareSettingsAllowed { - hfsCheck := &metal3api.HostFirmwareSettings{} - if err := r.Get(ctx, info.request.NamespacedName, hfsCheck); err == nil && len(hfsCheck.Spec.Settings) > 0 { - specsExist = true - } - } - if liveFirmwareUpdatesAllowed && !specsExist { - hfcCheck := &metal3api.HostFirmwareComponents{} - if err := r.Get(ctx, info.request.NamespacedName, hfcCheck); err == nil && len(hfcCheck.Spec.Updates) > 0 { - specsExist = true - } + if info.host.Status.ErrorType == metal3api.ServicingError && + (liveFirmwareSettingsAllowed || liveFirmwareUpdatesAllowed) { + specsExist, err := r.hostFirmwareSpecsExist(ctx, info, liveFirmwareSettingsAllowed, liveFirmwareUpdatesAllowed) + if err != nil { + return actionError{fmt.Errorf("could not determine if firmware specs exist: %w", err)} } if !specsExist { info.log.Info("specs cleared while in servicing error state, attempting abort") @@ -1506,7 +1543,9 @@ func (r *BareMetalHostReconciler) doServiceIfNeeded(ctx context.Context, prov pr return actionContinue{provResult.RequeueAfter} } info.log.Info("successfully recovered from servicing error") - clearErrorWithStatus(info.host, metal3api.OperationalStatusOK) + if clearErrorWithStatus(info.host, metal3api.OperationalStatusOK) { + return actionUpdate{} + } return actionComplete{} } } @@ -2578,6 +2617,7 @@ func (r *BareMetalHostReconciler) SetupWithManager(mgr ctrl.Manager, preprovImgE // Watch HFC/HFS for spec changes (generation bumps) so that the BMH // controller reconciles promptly when a user clears firmware specs, // rather than waiting for error-state exponential backoff to expire. + // HFS and HFC use the same name and namespace as their BareMetalHost. firmwareEventHandler := handler.EnqueueRequestsFromMapFunc( func(_ context.Context, obj client.Object) []ctrl.Request { return []ctrl.Request{{NamespacedName: client.ObjectKey{ diff --git a/internal/controller/metal3.io/baremetalhost_controller_test.go b/internal/controller/metal3.io/baremetalhost_controller_test.go index 6efa0071fd..0ec372b4c9 100644 --- a/internal/controller/metal3.io/baremetalhost_controller_test.go +++ b/internal/controller/metal3.io/baremetalhost_controller_test.go @@ -3053,6 +3053,107 @@ func TestHostFirmwareSettings(t *testing.T) { } } +func TestHostFirmwareSpecsExist(t *testing.T) { + var trueVal = true + var falseVal = false + + host := newDefaultHost(t) + info := makeReconcileInfo(host) + info.request = newRequest(host) + + t.Run("legacy firmware spec pending", func(t *testing.T) { + host := host.DeepCopy() + host.Status.Provisioning.Firmware = &metal3api.FirmwareConfig{ + VirtualizationEnabled: &falseVal, + } + host.Spec.Firmware = &metal3api.FirmwareConfig{ + VirtualizationEnabled: &trueVal, + } + info := makeReconcileInfo(host) + info.request = newRequest(host) + r := newTestReconciler(t, host) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, true, false) + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("HFS with settings", func(t *testing.T) { + hfs := newHostFirmwareSettings(host, nil) + r := newTestReconciler(t, host, hfs) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, true, false) + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("HFS not found and no legacy spec", func(t *testing.T) { + r := newTestReconciler(t, host) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, true, false) + require.NoError(t, err) + assert.False(t, exists) + }) + + t.Run("HFS empty settings", func(t *testing.T) { + hfs := newHostFirmwareSettings(host, nil) + hfs.Spec.Settings = nil + r := newTestReconciler(t, host, hfs) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, true, false) + require.NoError(t, err) + assert.False(t, exists) + }) + + t.Run("HFC with updates", func(t *testing.T) { + hfc := &metal3api.HostFirmwareComponents{ + ObjectMeta: metav1.ObjectMeta{ + Name: host.Name, + Namespace: host.Namespace, + }, + Spec: metal3api.HostFirmwareComponentsSpec{ + Updates: []metal3api.FirmwareUpdate{ + {Component: "bmc", URL: "http://example.com/bmc"}, + }, + }, + } + r := newTestReconciler(t, host, hfc) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, false, true) + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("neither HFS nor HFC when both paths enabled", func(t *testing.T) { + r := newTestReconciler(t, host) + + exists, err := r.hostFirmwareSpecsExist(t.Context(), info, true, true) + require.NoError(t, err) + assert.False(t, exists) + }) + + t.Run("Get error is propagated", func(t *testing.T) { + r := newTestReconciler(t, host) + r.Client = &getErrorClient{Client: r.Client, err: fmt.Errorf("apiserver unavailable")} + + _, err := r.hostFirmwareSpecsExist(t.Context(), info, true, false) + require.Error(t, err) + assert.Contains(t, err.Error(), "could not load host firmware settings") + }) +} + +type getErrorClient struct { + client.Client + err error +} + +func (c *getErrorClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if c.err != nil { + return c.err + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestBMHTransitionToPreparing(t *testing.T) { var True = true var False = false diff --git a/test/e2e/firmware_settings_test.go b/test/e2e/firmware_settings_test.go new file mode 100644 index 0000000000..21f4d10bb0 --- /dev/null +++ b/test/e2e/firmware_settings_test.go @@ -0,0 +1,571 @@ +//go:build e2e +// +build e2e + +package e2e + +import ( + "context" + "path" + "strings" + + metal3api "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/util/patch" +) + +// These example keys and values are hardcoded in sushy-tools. +// Warning: be careful when updating or reusing any keys: updated values will be persisted in the emulator between tests! +const ( + hfsTestKey1 = "ProcTurboMode" + hfsTestOrigValue1 = "Enabled" + hfsTestNewValue1 = "Disabled" + hfsTestKey2 = "EmbeddedSata" + hfsTestOrigValue2 = "Raid" + hfsTestNewValue2 = "Ata" +) + +var _ = Describe("Host Firmware Settings", Label("required", "firmware"), func() { + var ( + specName = "firmware-settings" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + ) + + BeforeEach(func() { + // FIXME(dtantsur): find a more elegant way to check for this feature + if !e2eConfig.GetBoolVariable("DEPLOY_IRONIC") || !strings.Contains(bmc.Address, "redfish") { + Skip("HFS tests require a real Ironic and a host with Redfish") + } + + namespace, cancelWatches = framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{ + Creator: clusterProxy.GetClient(), + ClientSet: clusterProxy.GetClientSet(), + Name: specName, + LogFolder: artifactFolder, + IgnoreAlreadyExists: true, + }) + }) + + It("should apply firmware settings created before the host, then apply a new value for available host", func() { + bmhName := specName + "-before-bmh" + secretName := bmhName + "-bmc" + + By("Creating a secret with BMH credentials") + bmcCredentialsData := map[string]string{ + "username": bmc.User, + "password": bmc.Password, + } + CreateSecret(ctx, clusterProxy.GetClient(), namespace.Name, secretName, bmcCredentialsData) + + By("Creating a HostFirmwareSettings with modified value before BMH") + hfs := &metal3api.HostFirmwareSettings{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.HostFirmwareSettingsSpec{ + Settings: metal3api.DesiredSettingsMap{ + hfsTestKey1: intstr.FromString(hfsTestNewValue1), + }, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, hfs)).To(Succeed()) + + By("Creating a BMH with inspection and cleaning disabled") + bmh := metal3api.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.BareMetalHostSpec{ + Online: true, + BMC: metal3api.BMCDetails{ + Address: bmc.Address, + CredentialsName: secretName, + DisableCertificateVerification: bmc.DisableCertificateVerification, + }, + BootMode: metal3api.BootMode(e2eConfig.GetVariable("BOOT_MODE")), + BootMACAddress: bmc.BootMacAddress, + AutomatedCleaningMode: metal3api.CleaningModeDisabled, + InspectionMode: metal3api.InspectionModeDisabled, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to start preparing") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StatePreparing, + }, e2eConfig.GetIntervals(specName, "wait-available")...) + + By("Waiting for the BMH to become available") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateAvailable, + }, e2eConfig.GetIntervals(specName, "wait-firmware-settings")...) + + By("Verifying the firmware setting was applied in HFS status") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + Expect(hfs.Status.Settings).To(HaveKeyWithValue(hfsTestKey1, hfsTestNewValue1)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionFalse)) + + By("Checking firmware schema") + fSchema := &metal3api.FirmwareSchema{} + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: hfs.Status.FirmwareSchema.Name, + Namespace: hfs.Status.FirmwareSchema.Namespace, + }, fSchema)).To(Succeed()) + Expect(fSchema.Spec.Schema).To(HaveKey(hfsTestKey1)) + Expect(fSchema.Spec.Schema).To(HaveKey(hfsTestKey2)) + + By("Deleting the BMH to get rid of cached values") + Expect(clusterProxy.GetClient().Delete(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to be deleted") + WaitForBmhDeleted(ctx, WaitForBmhDeletedInput{ + Client: clusterProxy.GetClient(), + BmhName: bmhName, + Namespace: namespace.Name, + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...) + + By("Making sure HFS was deleted too") + Eventually(func() bool { + return k8serrors.IsNotFound(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)) + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...).Should(BeTrue()) + + By("Creating a secret with BMH credentials again") + CreateSecret(ctx, clusterProxy.GetClient(), namespace.Name, secretName, bmcCredentialsData) + + By("Re-creating the BMH to check that the settings were saved in the backend") + bmh = metal3api.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.BareMetalHostSpec{ + Online: true, + BMC: metal3api.BMCDetails{ + Address: bmc.Address, + CredentialsName: secretName, + DisableCertificateVerification: bmc.DisableCertificateVerification, + }, + BootMode: metal3api.BootMode(e2eConfig.GetVariable("BOOT_MODE")), + BootMACAddress: bmc.BootMacAddress, + AutomatedCleaningMode: metal3api.CleaningModeDisabled, + InspectionMode: metal3api.InspectionModeDisabled, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to become available") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateAvailable, + }, e2eConfig.GetIntervals(specName, "wait-available")...) + + By("Verifying the updated firmware setting in HFS status") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + Expect(hfs.Status.Settings).To(HaveKeyWithValue(hfsTestKey1, hfsTestNewValue1)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionFalse)) + + By("Updating HostFirmwareSettings with the original value") + helper, err := patch.NewHelper(hfs, clusterProxy.GetClient()) + Expect(err).NotTo(HaveOccurred()) + hfs.Spec.Settings = metal3api.DesiredSettingsMap{ + hfsTestKey1: intstr.FromString(hfsTestOrigValue1), + } + Expect(helper.Patch(ctx, hfs)).To(Succeed()) + + By("Verifying the conditions on firmware setting") + Eventually(func(g Gomega) { + g.Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + g.Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + g.Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionTrue)) + }, e2eConfig.GetIntervals(specName, "wait-reconcile")...).Should(Succeed()) + + By("Waiting for the BMH to start preparing") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StatePreparing, + }, e2eConfig.GetIntervals(specName, "wait-available")...) + + By("Waiting for the BMH to become available") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateAvailable, + }, e2eConfig.GetIntervals(specName, "wait-firmware-settings")...) + + By("Verifying the firmware setting was applied in HFS status") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + Expect(hfs.Status.Settings).To(HaveKeyWithValue(hfsTestKey1, hfsTestOrigValue1)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionFalse)) + + By("Deleting the BMH") + Expect(clusterProxy.GetClient().Delete(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to be deleted") + WaitForBmhDeleted(ctx, WaitForBmhDeletedInput{ + Client: clusterProxy.GetClient(), + BmhName: bmhName, + Namespace: namespace.Name, + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...) + }) + + It("should update firmware settings on a provisioned host via servicing", func() { + bmhName := specName + "-servicing" + secretName := bmhName + "-bmc" + + By("Creating a secret with BMH credentials") + bmcCredentialsData := map[string]string{ + "username": bmc.User, + "password": bmc.Password, + } + CreateSecret(ctx, clusterProxy.GetClient(), namespace.Name, secretName, bmcCredentialsData) + + By("Creating a BMH with inspection and cleaning disabled") + bmh := metal3api.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.BareMetalHostSpec{ + Online: true, + BMC: metal3api.BMCDetails{ + Address: bmc.Address, + CredentialsName: secretName, + DisableCertificateVerification: bmc.DisableCertificateVerification, + }, + BootMode: metal3api.BootMode(e2eConfig.GetVariable("BOOT_MODE")), + BootMACAddress: bmc.BootMacAddress, + AutomatedCleaningMode: metal3api.CleaningModeDisabled, + InspectionMode: metal3api.InspectionModeDisabled, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to become available") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateAvailable, + }, e2eConfig.GetIntervals(specName, "wait-available")...) + + By("Checking the original value of the parameter") + origHFS := &metal3api.HostFirmwareSettings{} + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, origHFS)).To(Succeed()) + Expect(origHFS.Status.Settings).To(HaveKeyWithValue(hfsTestKey2, hfsTestOrigValue2)) + + By("Checking firmware schema") + fSchema := &metal3api.FirmwareSchema{} + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: origHFS.Status.FirmwareSchema.Name, + Namespace: origHFS.Status.FirmwareSchema.Namespace, + }, fSchema)).To(Succeed()) + Expect(fSchema.Spec.Schema).To(HaveKey(hfsTestKey1)) + Expect(fSchema.Spec.Schema).To(HaveKey(hfsTestKey2)) + + By("Provisioning the BMH") + var userDataSecret *corev1.SecretReference + if e2eConfig.GetVariable("SSH_CHECK_PROVISIONED") == "true" { + userDataSecretName := "user-data" + sshPubKeyPath := e2eConfig.GetVariable("SSH_PUB_KEY") + createSSHSetupUserdata(ctx, clusterProxy.GetClient(), namespace.Name, userDataSecretName, sshPubKeyPath, bmc.IPAddress) + userDataSecret = &corev1.SecretReference{ + Name: userDataSecretName, + Namespace: namespace.Name, + } + } + Expect(PatchBMHForProvisioning(ctx, PatchBMHForProvisioningInput{ + client: clusterProxy.GetClient(), + bmh: &bmh, + bmc: bmc, + e2eConfig: e2eConfig, + namespace: namespace.Name, + userDataSecret: userDataSecret, + })).To(Succeed()) + + By("Waiting for the BMH to be provisioned") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateProvisioned, + }, e2eConfig.GetIntervals(specName, "wait-provisioned")...) + + By("Creating a HostUpdatePolicy to allow firmware changes on reboot") + hup := &metal3api.HostUpdatePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.HostUpdatePolicySpec{ + FirmwareSettings: metal3api.HostUpdatePolicyOnReboot, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, hup)).To(Succeed()) + + By("Updating HostFirmwareSettings with a new value") + hfs := &metal3api.HostFirmwareSettings{} + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + + helper, err := patch.NewHelper(hfs, clusterProxy.GetClient()) + Expect(err).NotTo(HaveOccurred()) + hfs.Spec.Settings = metal3api.DesiredSettingsMap{ + hfsTestKey2: intstr.FromString(hfsTestNewValue2), + } + Expect(helper.Patch(ctx, hfs)).To(Succeed()) + + By("Verifying the conditions on firmware setting") + Eventually(func(g Gomega) { + g.Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + g.Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + g.Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionTrue)) + }, e2eConfig.GetIntervals(specName, "wait-reconcile")...).Should(Succeed()) + + By("Triggering a reboot via annotation") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, &bmh)).To(Succeed()) + AnnotateBmh(ctx, clusterProxy.GetClient(), bmh, metal3api.RebootAnnotationPrefix, ptr.To("")) + + By("Waiting for the BMH to enter servicing") + WaitForBmhInOperationalStatus(ctx, WaitForBmhInOperationalStatusInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.OperationalStatusServicing, + UndesiredStates: []metal3api.OperationalStatus{ + metal3api.OperationalStatusError, + }, + }, e2eConfig.GetIntervals(specName, "wait-servicing")...) + + By("Waiting for servicing to complete") + WaitForBmhInOperationalStatus(ctx, WaitForBmhInOperationalStatusInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.OperationalStatusOK, + UndesiredStates: []metal3api.OperationalStatus{ + metal3api.OperationalStatusError, + }, + }, e2eConfig.GetIntervals(specName, "wait-firmware-settings")...) + + By("Verifying the firmware setting was applied in HFS status") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + Expect(hfs.Status.Settings).To(HaveKeyWithValue(hfsTestKey2, hfsTestNewValue2)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsValid), metav1.ConditionTrue)) + Expect(hfs.Status.Conditions).To(ContainCondition(string(metal3api.FirmwareSettingsChangeDetected), metav1.ConditionFalse)) + + if e2eConfig.GetVariable("SSH_CHECK_PROVISIONED") == "true" { + By("Verifying the instance is still accessible via SSH") + PerformSSHBootCheck(e2eConfig, "disk", bmc.IPAddress) + } else { + Logf("WARNING: Skipping SSH check since SSH_CHECK_PROVISIONED != true") + } + + By("Deleting the BMH") + Expect(clusterProxy.GetClient().Delete(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to be deleted") + WaitForBmhDeleted(ctx, WaitForBmhDeletedInput{ + Client: clusterProxy.GetClient(), + BmhName: bmhName, + Namespace: namespace.Name, + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...) + + By("Making sure HFS was deleted too") + Eventually(func() bool { + return k8serrors.IsNotFound(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)) + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...).Should(BeTrue()) + }) + + It("should abort servicing when HostFirmwareSettings spec is cleared during servicing", func() { + bmhName := specName + "-servicing-abort" + secretName := bmhName + "-bmc" + + By("Creating a secret with BMH credentials") + bmcCredentialsData := map[string]string{ + "username": bmc.User, + "password": bmc.Password, + } + CreateSecret(ctx, clusterProxy.GetClient(), namespace.Name, secretName, bmcCredentialsData) + + By("Creating a BMH with inspection and cleaning disabled") + bmh := metal3api.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.BareMetalHostSpec{ + Online: true, + BMC: metal3api.BMCDetails{ + Address: bmc.Address, + CredentialsName: secretName, + DisableCertificateVerification: bmc.DisableCertificateVerification, + }, + BootMode: metal3api.BootMode(e2eConfig.GetVariable("BOOT_MODE")), + BootMACAddress: bmc.BootMacAddress, + AutomatedCleaningMode: metal3api.CleaningModeDisabled, + InspectionMode: metal3api.InspectionModeDisabled, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to become available") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateAvailable, + }, e2eConfig.GetIntervals(specName, "wait-available")...) + + By("Provisioning the BMH") + Expect(PatchBMHForProvisioning(ctx, PatchBMHForProvisioningInput{ + client: clusterProxy.GetClient(), + bmh: &bmh, + bmc: bmc, + e2eConfig: e2eConfig, + namespace: namespace.Name, + })).To(Succeed()) + + By("Waiting for the BMH to be provisioned") + WaitForBmhInProvisioningState(ctx, WaitForBmhInProvisioningStateInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.StateProvisioned, + }, e2eConfig.GetIntervals(specName, "wait-provisioned")...) + + By("Creating a HostUpdatePolicy to allow firmware changes on reboot") + hup := &metal3api.HostUpdatePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmhName, + Namespace: namespace.Name, + }, + Spec: metal3api.HostUpdatePolicySpec{ + FirmwareSettings: metal3api.HostUpdatePolicyOnReboot, + }, + } + Expect(clusterProxy.GetClient().Create(ctx, hup)).To(Succeed()) + + By("Updating HostFirmwareSettings with a new value") + hfs := &metal3api.HostFirmwareSettings{} + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + + helper, err := patch.NewHelper(hfs, clusterProxy.GetClient()) + Expect(err).NotTo(HaveOccurred()) + hfs.Spec.Settings = metal3api.DesiredSettingsMap{ + hfsTestKey2: intstr.FromString(hfsTestNewValue2), + } + Expect(helper.Patch(ctx, hfs)).To(Succeed()) + + By("Triggering a reboot via annotation") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, &bmh)).To(Succeed()) + AnnotateBmh(ctx, clusterProxy.GetClient(), bmh, metal3api.RebootAnnotationPrefix, ptr.To("")) + + By("Waiting for the BMH to enter servicing") + WaitForBmhInOperationalStatus(ctx, WaitForBmhInOperationalStatusInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.OperationalStatusServicing, + UndesiredStates: []metal3api.OperationalStatus{ + metal3api.OperationalStatusError, + }, + }, e2eConfig.GetIntervals(specName, "wait-servicing")...) + + By("Clearing HostFirmwareSettings spec to abort servicing") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, hfs)).To(Succeed()) + helper, err = patch.NewHelper(hfs, clusterProxy.GetClient()) + Expect(err).NotTo(HaveOccurred()) + hfs.Spec.Settings = nil + Expect(helper.Patch(ctx, hfs)).To(Succeed()) + + By("Waiting for servicing to abort and the BMH to return to OK") + WaitForBmhInOperationalStatus(ctx, WaitForBmhInOperationalStatusInput{ + Client: clusterProxy.GetClient(), + Bmh: bmh, + State: metal3api.OperationalStatusOK, + UndesiredStates: []metal3api.OperationalStatus{ + metal3api.OperationalStatusError, + metal3api.OperationalStatusServicing, + }, + }, e2eConfig.GetIntervals(specName, "wait-firmware-settings")...) + + By("Verifying the host is not stuck in ServicingError") + Expect(clusterProxy.GetClient().Get(ctx, types.NamespacedName{ + Name: bmhName, + Namespace: namespace.Name, + }, &bmh)).To(Succeed()) + Expect(bmh.Status.ErrorType).To(BeEmpty()) + + By("Deleting the BMH") + Expect(clusterProxy.GetClient().Delete(ctx, &bmh)).To(Succeed()) + + By("Waiting for the BMH to be deleted") + WaitForBmhDeleted(ctx, WaitForBmhDeletedInput{ + Client: clusterProxy.GetClient(), + BmhName: bmhName, + Namespace: namespace.Name, + }, e2eConfig.GetIntervals(specName, "wait-bmh-deleted")...) + }) + + AfterEach(func() { + CollectSerialLogs(bmc.Name, path.Join(artifactFolder, specName)) + DumpResources(ctx, e2eConfig, clusterProxy, path.Join(artifactFolder, specName)) + if !skipCleanup { + isNamespaced := e2eConfig.GetBoolVariable("NAMESPACE_SCOPED") + Cleanup(ctx, clusterProxy, namespace, cancelWatches, isNamespaced, e2eConfig.GetIntervals("default", "wait-namespace-deleted")...) + } + }) +}) From 01d32fcc45981cf6a36e2141628484b2de45a329 Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Mon, 25 May 2026 22:57:14 +1000 Subject: [PATCH 4/4] Add e2e intervals required by firmware settings tests --- test/e2e/config/ironic.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/e2e/config/ironic.yaml b/test/e2e/config/ironic.yaml index 6d3a966d07..e0401c6b50 100644 --- a/test/e2e/config/ironic.yaml +++ b/test/e2e/config/ironic.yaml @@ -82,6 +82,9 @@ intervals: default/wait-secret-deletion: ["1m", "1s"] default/wait-connect-ssh: ["2m", "10s"] default/wait-externally-provisioned: ["1m", "10ms"] + default/wait-servicing: [ "1m", "100ms" ] + default/wait-firmware-settings: [ "15m", "1s" ] + default/wait-reconcile: [ "1s", "10ms" ] kindExtraPortMappings: # Expose Ironic ports so they are reachable outside of kind