kubernetes · jbtk · Dec 31, 2024 · x13n · Jan 16, 2025 · jbtk
diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go
@@ -528,7 +528,18 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
 		scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal
 		klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
+		autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached",
+			"Max total nodes in cluster reached: %v", autoscalingContext.MaxNodesTotal)
 		shouldScaleUp = false
+
+		noScaleUpInfoForPods := []status.NoScaleUpInfo{}
+		for _, pod := range unschedulablePodsToHelp {
+			noScaleUpInfo := status.NoScaleUpInfo{
+				Pod: pod,
+			}
+			noScaleUpInfoForPods = append(noScaleUpInfoForPods, noScaleUpInfo)
+		}
+		scaleUpStatus.PodsRemainUnschedulable = noScaleUpInfoForPods
 	} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
 		// The assumption here is that these pods have been created very recently and probably there
 		// is more pods to come. In theory we could check the newest pod time but then if pod were created

diff --git a/cluster-autoscaler/processors/status/eventing_scale_up_processor.go b/cluster-autoscaler/processors/status/eventing_scale_up_processor.go
@@ -41,7 +41,7 @@ func (p *EventingScaleUpStatusProcessor) Process(context *context.AutoscalingCon
 		for _, noScaleUpInfo := range status.PodsRemainUnschedulable {
 			context.Recorder.Event(noScaleUpInfo.Pod, apiv1.EventTypeNormal, "NotTriggerScaleUp",
 				fmt.Sprintf("pod didn't trigger scale-up: %s",
-					ReasonsMessage(noScaleUpInfo, consideredNodeGroupsMap)))
+					ReasonsMessage(status.Result, noScaleUpInfo, consideredNodeGroupsMap)))
 		}
 	} else {
 		klog.V(4).Infof("Skipping event processing for unschedulable pods since there is a" +
@@ -60,7 +60,11 @@ func (p *EventingScaleUpStatusProcessor) CleanUp() {
 }
 
 // ReasonsMessage aggregates reasons from NoScaleUpInfos.
-func ReasonsMessage(noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
+func ReasonsMessage(scaleUpStatus ScaleUpResult, noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
+	if scaleUpStatus == ScaleUpLimitedByMaxNodesTotal {
+		return "max total nodes in cluster reached"
+	}
+
 	messages := []string{}
 	aggregated := map[string]int{}
 	for nodeGroupId, reasons := range noScaleUpInfo.RejectedNodeGroups {

diff --git a/cluster-autoscaler/processors/status/eventing_scale_up_processor_test.go b/cluster-autoscaler/processors/status/eventing_scale_up_processor_test.go
@@ -101,6 +101,21 @@ func TestEventingScaleUpStatusProcessor(t *testing.T) {
 			expectedTriggered:   0,
 			expectedNoTriggered: 0,
 		},
+		{
+			caseName: "No scale up; max total nodes in cluster reached",
+			state: &ScaleUpStatus{
+				Result:               ScaleUpLimitedByMaxNodesTotal,
+				ScaleUpInfos:         []nodegroupset.ScaleUpInfo{{}},
+				PodsTriggeredScaleUp: []*apiv1.Pod{},
+				PodsRemainUnschedulable: []NoScaleUpInfo{
+					{Pod: p1},
+					{Pod: p2},
+					{Pod: p3},
+				},
+			},
+			expectedTriggered:   0,
+			expectedNoTriggered: 3,
+		},
 	}
 
 	for _, tc := range testCases {
@@ -166,9 +181,18 @@ func TestReasonsMessage(t *testing.T) {
 		"2 max limit reached",
 		"1 not ready",
 	}
-	result := ReasonsMessage(NoScaleUpInfo{nil, rejected, skipped}, considered)
+	result := ReasonsMessage(ScaleUpNoOptionsAvailable, NoScaleUpInfo{nil, rejected, skipped}, considered)
 
 	for _, part := range expected {
 		assert.Contains(t, result, part)
 	}
 }
+
+func TestReasonsMessageWhenScaleUpLimitedByMaxNodesTotal(t *testing.T) {
+	considered := map[string]cloudprovider.NodeGroup{}
+	noScaleUpInfo := NoScaleUpInfo{
+		Pod: nil,
+	}
+	result := ReasonsMessage(ScaleUpLimitedByMaxNodesTotal, noScaleUpInfo, considered)
+	assert.Contains(t, result, "max total nodes in cluster reached")
+}