github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/cluster_plan_builder.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package apps 21 22 import ( 23 "context" 24 "fmt" 25 "reflect" 26 27 "github.com/go-logr/logr" 28 snapshotv1beta1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1" 29 snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" 30 "github.com/pkg/errors" 31 appsv1 "k8s.io/api/apps/v1" 32 corev1 "k8s.io/api/core/v1" 33 apierrors "k8s.io/apimachinery/pkg/api/errors" 34 "k8s.io/apimachinery/pkg/api/meta" 35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 "k8s.io/client-go/tools/record" 37 ctrl "sigs.k8s.io/controller-runtime" 38 "sigs.k8s.io/controller-runtime/pkg/client" 39 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 40 41 appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1" 42 dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1" 43 extensionsv1alpha1 "github.com/1aal/kubeblocks/apis/extensions/v1alpha1" 44 storagev1alpha1 "github.com/1aal/kubeblocks/apis/storage/v1alpha1" 45 workloadsv1alpha1 "github.com/1aal/kubeblocks/apis/workloads/v1alpha1" 46 "github.com/1aal/kubeblocks/pkg/constant" 47 roclient "github.com/1aal/kubeblocks/pkg/controller/client" 48 "github.com/1aal/kubeblocks/pkg/controller/graph" 49 "github.com/1aal/kubeblocks/pkg/controller/model" 50 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 51 ) 52 53 const ( 54 defaultWeight int = iota 55 workloadWeight 56 clusterWeight 57 ) 58 59 // clusterTransformContext a graph.TransformContext implementation for Cluster reconciliation 60 type clusterTransformContext struct { 61 context.Context 62 Client roclient.ReadonlyClient 63 record.EventRecorder 64 logr.Logger 65 Cluster *appsv1alpha1.Cluster 66 OrigCluster *appsv1alpha1.Cluster 67 ClusterDef *appsv1alpha1.ClusterDefinition 68 ClusterVer *appsv1alpha1.ClusterVersion 69 } 70 71 // clusterPlanBuilder a graph.PlanBuilder implementation for Cluster reconciliation 72 type clusterPlanBuilder struct { 73 req ctrl.Request 74 cli client.Client 75 transCtx *clusterTransformContext 76 transformers graph.TransformerChain 77 } 78 79 // clusterPlan a graph.Plan implementation for Cluster reconciliation 80 type clusterPlan struct { 81 dag *graph.DAG 82 walkFunc graph.WalkFunc 83 cli client.Client 84 transCtx *clusterTransformContext 85 } 86 87 var _ graph.TransformContext = &clusterTransformContext{} 88 var _ graph.PlanBuilder = &clusterPlanBuilder{} 89 var _ graph.Plan = &clusterPlan{} 90 91 // TransformContext implementation 92 93 func (c *clusterTransformContext) GetContext() context.Context { 94 return c.Context 95 } 96 97 func (c *clusterTransformContext) GetClient() roclient.ReadonlyClient { 98 return c.Client 99 } 100 101 func (c *clusterTransformContext) GetRecorder() record.EventRecorder { 102 return c.EventRecorder 103 } 104 105 func (c *clusterTransformContext) GetLogger() logr.Logger { 106 return c.Logger 107 } 108 109 func init() { 110 model.AddScheme(appsv1alpha1.AddToScheme) 111 model.AddScheme(dpv1alpha1.AddToScheme) 112 model.AddScheme(snapshotv1.AddToScheme) 113 model.AddScheme(snapshotv1beta1.AddToScheme) 114 model.AddScheme(extensionsv1alpha1.AddToScheme) 115 model.AddScheme(workloadsv1alpha1.AddToScheme) 116 model.AddScheme(storagev1alpha1.AddToScheme) 117 } 118 119 // PlanBuilder implementation 120 121 func (c *clusterPlanBuilder) Init() error { 122 cluster := &appsv1alpha1.Cluster{} 123 if err := c.cli.Get(c.transCtx.Context, c.req.NamespacedName, cluster); err != nil { 124 return err 125 } 126 c.AddTransformer(&initTransformer{cluster: cluster}) 127 return nil 128 } 129 130 func (c *clusterPlanBuilder) AddTransformer(transformer ...graph.Transformer) graph.PlanBuilder { 131 c.transformers = append(c.transformers, transformer...) 132 return c 133 } 134 135 func (c *clusterPlanBuilder) AddParallelTransformer(transformer ...graph.Transformer) graph.PlanBuilder { 136 c.transformers = append(c.transformers, &ParallelTransformers{transformers: transformer}) 137 return c 138 } 139 140 // Build runs all transformers to generate a plan 141 func (c *clusterPlanBuilder) Build() (graph.Plan, error) { 142 var err error 143 defer func() { 144 // set apply resource condition 145 // if cluster is being deleted, no need to set apply resource condition 146 if c.transCtx.Cluster.IsDeleting() { 147 return 148 } 149 preCheckCondition := meta.FindStatusCondition(c.transCtx.Cluster.Status.Conditions, appsv1alpha1.ConditionTypeProvisioningStarted) 150 if preCheckCondition == nil { 151 // this should not happen 152 return 153 } 154 // if pre-check failed, this is a fast return, no need to set apply resource condition 155 if preCheckCondition.Status != metav1.ConditionTrue { 156 sendWarningEventWithError(c.transCtx.GetRecorder(), c.transCtx.Cluster, ReasonPreCheckFailed, err) 157 return 158 } 159 setApplyResourceCondition(&c.transCtx.Cluster.Status.Conditions, c.transCtx.Cluster.Generation, err) 160 sendWarningEventWithError(c.transCtx.GetRecorder(), c.transCtx.Cluster, ReasonApplyResourcesFailed, err) 161 }() 162 163 // new a DAG and apply chain on it 164 dag := graph.NewDAG() 165 err = c.transformers.ApplyTo(c.transCtx, dag) 166 c.transCtx.Logger.V(1).Info(fmt.Sprintf("DAG: %s", dag)) 167 168 // construct execution plan 169 plan := &clusterPlan{ 170 dag: dag, 171 walkFunc: c.defaultWalkFuncWithLogging, 172 cli: c.cli, 173 transCtx: c.transCtx, 174 } 175 return plan, err 176 } 177 178 // Plan implementation 179 180 func (p *clusterPlan) Execute() error { 181 less := func(v1, v2 graph.Vertex) bool { 182 getWeight := func(v graph.Vertex) int { 183 lifecycleVertex, ok := v.(*model.ObjectVertex) 184 if !ok { 185 return defaultWeight 186 } 187 switch lifecycleVertex.Obj.(type) { 188 case *appsv1alpha1.Cluster: 189 return clusterWeight 190 case *appsv1.StatefulSet, *appsv1.Deployment: 191 return workloadWeight 192 default: 193 return defaultWeight 194 } 195 } 196 return getWeight(v1) <= getWeight(v2) 197 } 198 err := p.dag.WalkReverseTopoOrder(p.walkFunc, less) 199 if err != nil { 200 if hErr := p.handlePlanExecutionError(err); hErr != nil { 201 return hErr 202 } 203 } 204 return err 205 } 206 207 func (p *clusterPlan) handlePlanExecutionError(err error) error { 208 clusterCopy := p.transCtx.OrigCluster.DeepCopy() 209 condition := newFailedApplyResourcesCondition(err) 210 meta.SetStatusCondition(&clusterCopy.Status.Conditions, condition) 211 sendWarningEventWithError(p.transCtx.GetRecorder(), clusterCopy, ReasonApplyResourcesFailed, err) 212 return p.cli.Status().Patch(p.transCtx.Context, clusterCopy, client.MergeFrom(p.transCtx.OrigCluster)) 213 } 214 215 // Do the real works 216 217 // NewClusterPlanBuilder returns a clusterPlanBuilder powered PlanBuilder 218 func NewClusterPlanBuilder(ctx intctrlutil.RequestCtx, cli client.Client, req ctrl.Request) graph.PlanBuilder { 219 return &clusterPlanBuilder{ 220 req: req, 221 cli: cli, 222 transCtx: &clusterTransformContext{ 223 Context: ctx.Ctx, 224 Client: model.NewGraphClient(cli), 225 EventRecorder: ctx.Recorder, 226 Logger: ctx.Log, 227 }, 228 } 229 } 230 231 func (c *clusterPlanBuilder) defaultWalkFuncWithLogging(vertex graph.Vertex) error { 232 node, ok := vertex.(*model.ObjectVertex) 233 err := c.defaultWalkFunc(vertex) 234 switch { 235 case err == nil: 236 return err 237 case !ok: 238 c.transCtx.Logger.Error(err, "") 239 case node.Action == nil: 240 c.transCtx.Logger.Error(err, fmt.Sprintf("%T", node)) 241 case apierrors.IsConflict(err): 242 return err 243 default: 244 c.transCtx.Logger.Error(err, fmt.Sprintf("%s %T error", *node.Action, node.Obj)) 245 } 246 return err 247 } 248 249 func (c *clusterPlanBuilder) defaultWalkFunc(vertex graph.Vertex) error { 250 node, ok := vertex.(*model.ObjectVertex) 251 if !ok { 252 return fmt.Errorf("wrong vertex type %v", vertex) 253 } 254 if node.Action == nil { 255 return errors.New("node action can't be nil") 256 } 257 258 // cluster object has more business to do, handle them here 259 if _, ok = node.Obj.(*appsv1alpha1.Cluster); ok { 260 if err := c.reconcileCluster(node); err != nil { 261 return err 262 } 263 } 264 return c.reconcileObject(node) 265 } 266 267 func (c *clusterPlanBuilder) reconcileObject(node *model.ObjectVertex) error { 268 switch *node.Action { 269 case model.CREATE: 270 err := c.cli.Create(c.transCtx.Context, node.Obj) 271 if err != nil && !apierrors.IsAlreadyExists(err) { 272 return err 273 } 274 case model.UPDATE: 275 err := c.cli.Update(c.transCtx.Context, node.Obj) 276 if err != nil && !apierrors.IsNotFound(err) { 277 return err 278 } 279 case model.PATCH: 280 patch := client.MergeFrom(node.OriObj) 281 if err := c.cli.Patch(c.transCtx.Context, node.Obj, patch); err != nil && !apierrors.IsNotFound(err) { 282 return err 283 } 284 case model.DELETE: 285 if controllerutil.RemoveFinalizer(node.Obj, constant.DBClusterFinalizerName) { 286 err := c.cli.Update(c.transCtx.Context, node.Obj) 287 if err != nil && !apierrors.IsNotFound(err) { 288 return err 289 } 290 } 291 // delete secondary objects 292 if _, ok := node.Obj.(*appsv1alpha1.Cluster); !ok { 293 err := intctrlutil.BackgroundDeleteObject(c.cli, c.transCtx.Context, node.Obj) 294 if err != nil && !apierrors.IsNotFound(err) { 295 return err 296 } 297 } 298 case model.STATUS: 299 patch := client.MergeFrom(node.OriObj) 300 if err := c.cli.Status().Patch(c.transCtx.Context, node.Obj, patch); err != nil { 301 return err 302 } 303 // handle condition and phase changing triggered events 304 if newCluster, ok := node.Obj.(*appsv1alpha1.Cluster); ok { 305 oldCluster, _ := node.OriObj.(*appsv1alpha1.Cluster) 306 c.emitConditionUpdatingEvent(oldCluster.Status.Conditions, newCluster.Status.Conditions) 307 c.emitStatusUpdatingEvent(oldCluster.Status, newCluster.Status) 308 } 309 case model.NOOP: 310 // nothing 311 } 312 return nil 313 } 314 315 func (c *clusterPlanBuilder) reconcileCluster(node *model.ObjectVertex) error { 316 cluster := node.Obj.(*appsv1alpha1.Cluster).DeepCopy() 317 origCluster := node.OriObj.(*appsv1alpha1.Cluster) 318 switch *node.Action { 319 // cluster.meta and cluster.spec might change 320 case model.STATUS: 321 if !reflect.DeepEqual(cluster.ObjectMeta, origCluster.ObjectMeta) || !reflect.DeepEqual(cluster.Spec, origCluster.Spec) { 322 patch := client.MergeFrom(origCluster.DeepCopy()) 323 if err := c.cli.Patch(c.transCtx.Context, cluster, patch); err != nil { 324 return err 325 } 326 } 327 case model.CREATE, model.UPDATE: 328 return fmt.Errorf("cluster can't be created or updated: %s", cluster.Name) 329 } 330 return nil 331 } 332 333 func (c *clusterPlanBuilder) emitConditionUpdatingEvent(oldConditions, newConditions []metav1.Condition) { 334 for _, newCondition := range newConditions { 335 oldCondition := meta.FindStatusCondition(oldConditions, newCondition.Type) 336 // filtered in cluster creation 337 if oldCondition == nil && newCondition.Status == metav1.ConditionFalse { 338 return 339 } 340 if !reflect.DeepEqual(oldCondition, &newCondition) { 341 eType := corev1.EventTypeNormal 342 if newCondition.Status == metav1.ConditionFalse { 343 eType = corev1.EventTypeWarning 344 } 345 c.transCtx.EventRecorder.Event(c.transCtx.Cluster, eType, newCondition.Reason, newCondition.Message) 346 } 347 } 348 } 349 350 func (c *clusterPlanBuilder) emitStatusUpdatingEvent(oldStatus, newStatus appsv1alpha1.ClusterStatus) { 351 cluster := c.transCtx.Cluster 352 newPhase := newStatus.Phase 353 if newPhase == oldStatus.Phase { 354 return 355 } 356 eType := corev1.EventTypeNormal 357 message := "" 358 switch newPhase { 359 case appsv1alpha1.RunningClusterPhase: 360 message = fmt.Sprintf("Cluster: %s is ready, current phase is %s", cluster.Name, newPhase) 361 case appsv1alpha1.StoppedClusterPhase: 362 message = fmt.Sprintf("Cluster: %s stopped successfully.", cluster.Name) 363 case appsv1alpha1.FailedClusterPhase, appsv1alpha1.AbnormalClusterPhase: 364 message = fmt.Sprintf("Cluster: %s is %s, check according to the components message", cluster.Name, newPhase) 365 eType = corev1.EventTypeWarning 366 } 367 if len(message) > 0 { 368 c.transCtx.EventRecorder.Event(cluster, eType, string(newPhase), message) 369 } 370 }