-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
create a scaler controller to watch and trigger the events on CR creation #529
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,309 @@ | ||
/* | ||
Copyright 2021 Red Hat OpenShift Data Foundation. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package controllers | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"slices" | ||
|
||
"github.com/go-logr/logr" | ||
operatorv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could be shortened to |
||
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" | ||
"k8s.io/apimachinery/pkg/api/meta" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/builder" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
"sigs.k8s.io/controller-runtime/pkg/controller" | ||
"sigs.k8s.io/controller-runtime/pkg/event" | ||
"sigs.k8s.io/controller-runtime/pkg/handler" | ||
"sigs.k8s.io/controller-runtime/pkg/log" | ||
"sigs.k8s.io/controller-runtime/pkg/predicate" | ||
"sigs.k8s.io/controller-runtime/pkg/source" | ||
) | ||
|
||
var csvLabelKey = "operators.coreos.com/%s.%s" | ||
|
||
var ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. separation is already being followed w/ new lines and so why to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am doing that to keep different logical variables separate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That isn't helping anything I believe, a single var w/ a comment around each segment would inform the intent more appropriately. In current state, reviewers (future & current) should ensure ntg in b/n should be written. |
||
kindStorageCluster = &metav1.PartialObjectMetadata{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: "ocs.openshift.io/v1", | ||
Kind: "StorageCluster", | ||
}, | ||
} | ||
kindCephCluster = &metav1.PartialObjectMetadata{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: "ceph.rook.io/v1", | ||
Kind: "CephCluster", | ||
}, | ||
} | ||
kindNoobaa = &metav1.PartialObjectMetadata{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: "noobaa.io/v1alpha1", | ||
Kind: "NooBaa", | ||
}, | ||
} | ||
kindFlashSystemCluster = &metav1.PartialObjectMetadata{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: "odf.ibm.com/v1alpha1", | ||
Kind: "FlashSystemCluster", | ||
}, | ||
} | ||
|
||
//nolint:unused | ||
kindList = []metav1.PartialObjectMetadata{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe remove unused code? not sure where it is planned, storing pointers would be good if used at a later point. |
||
*kindStorageCluster, | ||
*kindCephCluster, | ||
*kindNoobaa, | ||
*kindFlashSystemCluster, | ||
} | ||
) | ||
|
||
var ( | ||
crdStorageCluster = "storageclusters.ocs.openshift.io" | ||
crdCephCluster = "cephclusters.ceph.rook.io" | ||
crdNoobaa = "noobaas.noobaa.io" | ||
crdFlashSystemCluster = "flashsystemclusters.odf.ibm.com" | ||
Comment on lines
+80
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are better suited as |
||
|
||
crdList = []string{ | ||
crdStorageCluster, | ||
crdCephCluster, | ||
crdNoobaa, | ||
crdFlashSystemCluster, | ||
} | ||
Comment on lines
+80
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are names of CRDs and |
||
) | ||
|
||
var ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are better suited as |
||
csvCephcsiOperator = "cephcsi-operator" //nolint:unused | ||
csvMcgOperator = "mcg-operator" | ||
csvOcsClientOperator = "ocs-client-operator" //nolint:unused | ||
csvOcsOperator = "ocs-operator" | ||
csvOdfCsiAddonsOperator = "odf-csi-addons-operator" //nolint:unused | ||
csvOdfDependencies = "odf-dependencies" //nolint:unused | ||
csvOdfOperator = "odf-operator" //nolint:unused | ||
csvOdfPrometheusOperator = "odf-prometheus-operator" //nolint:unused | ||
csvRecipe = "recipe" //nolint:unused | ||
csvRookCephOperator = "rook-ceph-operator" | ||
|
||
csvIbmStorageOdfOperator = "ibm-storage-odf-operator" | ||
Comment on lines
+94
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aren't these name of packages rather than CSV? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, i Just named as CSV as we refer to csv mostly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I mean these represent pkg and should be named accordingly. |
||
) | ||
|
||
var ( | ||
crdToKindMapping = map[string]metav1.PartialObjectMetadata{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here the value is copied into map which has two issues
Either
I prefer 2 as I don't see why map needs exclusive ownership of these. |
||
crdStorageCluster: *kindStorageCluster, | ||
crdCephCluster: *kindCephCluster, | ||
crdNoobaa: *kindNoobaa, | ||
crdFlashSystemCluster: *kindFlashSystemCluster, | ||
} | ||
|
||
kindToCsvsMapping = map[*metav1.PartialObjectMetadata][]string{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this a slice of strings, future proofing that we might want to scale more that one operator for a kind? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes thats correct. Even when we will enhance this we will need more than one csvs to be scaled. |
||
kindStorageCluster: []string{csvOcsOperator}, | ||
kindCephCluster: []string{csvRookCephOperator}, | ||
kindNoobaa: []string{csvMcgOperator}, | ||
kindFlashSystemCluster: []string{csvIbmStorageOdfOperator}, | ||
} | ||
) | ||
|
||
type ScalerReconciler struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
ctx context.Context | ||
logger logr.Logger | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe anything should be fine. Both make sense. |
||
Client client.Client | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reconciler usually makes api calls and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually i want that all the client APIS and func gets called via the client itself and not directly via the reconciler. It reduce confusions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, it's contrary to your statement ie, not embedding w/o any upside is different to how reconcilers are scaffolded. |
||
Scheme *runtime.Scheme | ||
OperatorNamespace string | ||
controller controller.Controller | ||
mgr ctrl.Manager | ||
} | ||
|
||
//+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch | ||
//+kubebuilder:rbac:groups=operators.coreos.com,resources=clusterserviceversions,verbs=get;list;update | ||
|
||
//+kubebuilder:rbac:groups=ocs.openshift.io,resources=storageclusters,verbs=get;list;watch | ||
//+kubebuilder:rbac:groups=ceph.rook.io,resources=cephclusters,verbs=get;list;watch | ||
//+kubebuilder:rbac:groups=noobaa.io,resources=noobaas,verbs=get;list;watch | ||
//+kubebuilder:rbac:groups=odf.ibm.com,resources=flashsystemclusters,verbs=get;list;watch | ||
|
||
// For more details, check Reconcile and its Result here: | ||
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile | ||
func (r *ScalerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
r.ctx = ctx | ||
r.logger = log.FromContext(ctx) | ||
|
||
r.logger.Info("starting reconcile") | ||
|
||
err := r.addWatches(req) | ||
if err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
|
||
err = r.scaleOperators() | ||
if err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
|
||
r.logger.Info("ending reconcile") | ||
return ctrl.Result{}, nil | ||
} | ||
|
||
func (r *ScalerReconciler) scaleOperators() error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possible to follow a consistent flow in laying out functions? First one called is I know this could be a nitpick however maintaining a consistent flow really helps in reducing movement here and there, I suggest SetupWithManager, Reconcile (as these are exported) and rest of the methods in the order we reconcile with file level helpers at the end, only a suggestion, any reasonable layout is fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The one i am following now is operator-sdk layout where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, they are first and last as there is ntg in b/n exported (and kubebuilder prefers many things alphabetically) , so all other methods off of reconciler and helper should come after exported functions then? |
||
|
||
for kind, csvNames := range kindToCsvsMapping { | ||
|
||
var replicas int32 = 1 | ||
objects := &metav1.PartialObjectMetadataList{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possible to give a more concrete name here? these are CRs, isn't it? crList? |
||
objects.TypeMeta = kind.TypeMeta | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this work 🤔? isn't List and individual resource distinct? this line here seems like assigning Pod typeMeta to PodList typeMeta. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it does work |
||
|
||
err := r.Client.List(r.ctx, objects) | ||
if err != nil { | ||
if meta.IsNoMatchError(err) { | ||
continue | ||
} | ||
Comment on lines
+173
to
+176
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes they are different IsNoMatchError is when there is no CRD. |
||
r.logger.Error(err, "failed to list objects") | ||
return err | ||
} | ||
|
||
if len(objects.Items) == 0 { | ||
replicas = 0 | ||
} | ||
Comment on lines
+181
to
+183
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this might need an offline discussion, possible to bring in next meeting? the intention here seems to scale down if no CR for the operator is running and this assumes operator is only responsible for CR but not any auxiliary resources (like a webhook or configmap etc) which might not always be the case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. correct |
||
|
||
for _, csvName := range csvNames { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe it's better to run this in parallel or let the loop go over all CSVs trying to update CSV as a failure in one stops everything else now, why do want to put a break if all operators can be run in parallel in the cluster? |
||
key := fmt.Sprintf(csvLabelKey, csvName, r.OperatorNamespace) | ||
|
||
csvList := &operatorv1alpha1.ClusterServiceVersionList{} | ||
err = r.Client.List( | ||
r.ctx, csvList, | ||
&client.ListOptions{ | ||
LabelSelector: labels.SelectorFromSet(map[string]string{key: ""}), | ||
Namespace: r.OperatorNamespace, | ||
}, | ||
Comment on lines
+191
to
+194
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have helper methods to fill out the ListOptions like |
||
) | ||
if err != nil { | ||
r.logger.Error(err, "failed to list csvs of label", "label", key) | ||
return err | ||
} | ||
|
||
for _, csvObj := range csvList.Items { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this creates more work for gc if it escapes to heap as in recent versions Go is making sure that every iteration gets a new instance of the variable through out the loops lifetime and I suggest to take a index for this for idx := range csvList.Items {
csv := &csvList.Items[idx] |
||
var isUpdateRequired bool | ||
|
||
for i := range csvObj.Spec.InstallStrategy.StrategySpec.DeploymentSpecs { | ||
if replicas != *csvObj.Spec.InstallStrategy.StrategySpec.DeploymentSpecs[i].Spec.Replicas { | ||
csvObj.Spec.InstallStrategy.StrategySpec.DeploymentSpecs[i].Spec.Replicas = &replicas | ||
isUpdateRequired = true | ||
} | ||
} | ||
|
||
if isUpdateRequired { | ||
err = r.Client.Update(r.ctx, &csvObj) | ||
if err != nil { | ||
r.logger.Error(err, "failed to update csv replica", "replicas", replicas) | ||
return err | ||
} | ||
r.logger.Info("csv updated successfully", "csvName", csvObj.Name, "replicas", replicas) | ||
} | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (r *ScalerReconciler) addWatches(req ctrl.Request) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
if !slices.Contains(crdList, req.Name) { | ||
return nil | ||
} | ||
|
||
r.logger.Info("adding dynamic watch", "object", req.Name) | ||
|
||
kind := crdToKindMapping[req.Name] | ||
err := r.addDynamicWatch(&kind) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is an implicit assumption here that we don't get event on same CRD which is incorrect as runtime requeues' all events based on sync time (usually ~10h iirc), you need to guard against re-adding the watch again as an observation was already made that it increases reconcile events. |
||
if err != nil { | ||
r.logger.Error(err, "failed to add dynamic watch", "object", req.Name) | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (r *ScalerReconciler) addDynamicWatch(kind *metav1.PartialObjectMetadata) error { | ||
return r.controller.Watch( | ||
source.Kind( | ||
r.mgr.GetCache(), | ||
client.Object(kind), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can change param type to client.Object as ParitalObj already satisfies this during invocation it is cast by default, explicitness here isn't helping anything as at the end we are casting before usage. |
||
&handler.EnqueueRequestForObject{}, | ||
predicate.Funcs{ | ||
// Trigger the reconcile for both creation and deletion events of the object. | ||
// This ensures the replicas in the CSV are updated based on the presence or absence of the Custom Resource (CR). | ||
CreateFunc: func(e event.CreateEvent) bool { | ||
return true | ||
}, | ||
DeleteFunc: func(e event.DeleteEvent) bool { | ||
return true | ||
}, | ||
UpdateFunc: func(e event.UpdateEvent) bool { | ||
return false | ||
}, | ||
GenericFunc: func(e event.GenericEvent) bool { | ||
return false | ||
}, | ||
}, | ||
predicate.GenerationChangedPredicate{}, | ||
), | ||
) | ||
} | ||
|
||
// SetupWithManager sets up the controller with the Manager. | ||
func (r *ScalerReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||
|
||
controller, err := ctrl.NewControllerManagedBy(mgr). | ||
Named("scaler"). | ||
WatchesMetadata( | ||
&extv1.CustomResourceDefinition{}, | ||
&handler.EnqueueRequestForObject{}, | ||
builder.WithPredicates( | ||
predicate.NewPredicateFuncs(func(obj client.Object) bool { | ||
return slices.Contains(crdList, obj.GetName()) | ||
}), | ||
predicate.Funcs{ | ||
// Trigger a reconcile only during the creation of a specific CRD to ensure it runs exactly once for that CRD. | ||
// This is required to dynamically add a watch for the corresponding Custom Resource (CR) based on the CRD name. | ||
// The reconcile will be triggered with the CRD name as `req.Name`, and the reconciler will set up a watch for the CR using the CRD name. | ||
CreateFunc: func(e event.CreateEvent) bool { | ||
return true | ||
}, | ||
DeleteFunc: func(e event.DeleteEvent) bool { | ||
return false | ||
}, | ||
UpdateFunc: func(e event.UpdateEvent) bool { | ||
return false | ||
}, | ||
GenericFunc: func(e event.GenericEvent) bool { | ||
return false | ||
}, | ||
}, | ||
predicate.GenerationChangedPredicate{}, | ||
), | ||
). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm just thinking, do we want to add a watch for CSV as we are effectively managing it's replicas? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently i dont want to because if we do that we don't leave any room for user to make the change to the replicas of the CSV also they wont be able to scale down the odf-operator because of the webhook. |
||
Build(r) | ||
|
||
r.controller = controller | ||
r.mgr = mgr | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not store only what's necessary? here we require only manager cache, isn't it? |
||
|
||
return err | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pls change year in the copyright.