github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/cluster_plan_builder.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package apps
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"reflect"
    26  
    27  	"github.com/go-logr/logr"
    28  	snapshotv1beta1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1"
    29  	snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1"
    30  	"github.com/pkg/errors"
    31  	appsv1 "k8s.io/api/apps/v1"
    32  	corev1 "k8s.io/api/core/v1"
    33  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    34  	"k8s.io/apimachinery/pkg/api/meta"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"k8s.io/client-go/tools/record"
    37  	ctrl "sigs.k8s.io/controller-runtime"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    40  
    41  	appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1"
    42  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    43  	extensionsv1alpha1 "github.com/1aal/kubeblocks/apis/extensions/v1alpha1"
    44  	storagev1alpha1 "github.com/1aal/kubeblocks/apis/storage/v1alpha1"
    45  	workloadsv1alpha1 "github.com/1aal/kubeblocks/apis/workloads/v1alpha1"
    46  	"github.com/1aal/kubeblocks/pkg/constant"
    47  	roclient "github.com/1aal/kubeblocks/pkg/controller/client"
    48  	"github.com/1aal/kubeblocks/pkg/controller/graph"
    49  	"github.com/1aal/kubeblocks/pkg/controller/model"
    50  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    51  )
    52  
    53  const (
    54  	defaultWeight int = iota
    55  	workloadWeight
    56  	clusterWeight
    57  )
    58  
    59  // clusterTransformContext a graph.TransformContext implementation for Cluster reconciliation
    60  type clusterTransformContext struct {
    61  	context.Context
    62  	Client roclient.ReadonlyClient
    63  	record.EventRecorder
    64  	logr.Logger
    65  	Cluster     *appsv1alpha1.Cluster
    66  	OrigCluster *appsv1alpha1.Cluster
    67  	ClusterDef  *appsv1alpha1.ClusterDefinition
    68  	ClusterVer  *appsv1alpha1.ClusterVersion
    69  }
    70  
    71  // clusterPlanBuilder a graph.PlanBuilder implementation for Cluster reconciliation
    72  type clusterPlanBuilder struct {
    73  	req          ctrl.Request
    74  	cli          client.Client
    75  	transCtx     *clusterTransformContext
    76  	transformers graph.TransformerChain
    77  }
    78  
    79  // clusterPlan a graph.Plan implementation for Cluster reconciliation
    80  type clusterPlan struct {
    81  	dag      *graph.DAG
    82  	walkFunc graph.WalkFunc
    83  	cli      client.Client
    84  	transCtx *clusterTransformContext
    85  }
    86  
    87  var _ graph.TransformContext = &clusterTransformContext{}
    88  var _ graph.PlanBuilder = &clusterPlanBuilder{}
    89  var _ graph.Plan = &clusterPlan{}
    90  
    91  // TransformContext implementation
    92  
    93  func (c *clusterTransformContext) GetContext() context.Context {
    94  	return c.Context
    95  }
    96  
    97  func (c *clusterTransformContext) GetClient() roclient.ReadonlyClient {
    98  	return c.Client
    99  }
   100  
   101  func (c *clusterTransformContext) GetRecorder() record.EventRecorder {
   102  	return c.EventRecorder
   103  }
   104  
   105  func (c *clusterTransformContext) GetLogger() logr.Logger {
   106  	return c.Logger
   107  }
   108  
   109  func init() {
   110  	model.AddScheme(appsv1alpha1.AddToScheme)
   111  	model.AddScheme(dpv1alpha1.AddToScheme)
   112  	model.AddScheme(snapshotv1.AddToScheme)
   113  	model.AddScheme(snapshotv1beta1.AddToScheme)
   114  	model.AddScheme(extensionsv1alpha1.AddToScheme)
   115  	model.AddScheme(workloadsv1alpha1.AddToScheme)
   116  	model.AddScheme(storagev1alpha1.AddToScheme)
   117  }
   118  
   119  // PlanBuilder implementation
   120  
   121  func (c *clusterPlanBuilder) Init() error {
   122  	cluster := &appsv1alpha1.Cluster{}
   123  	if err := c.cli.Get(c.transCtx.Context, c.req.NamespacedName, cluster); err != nil {
   124  		return err
   125  	}
   126  	c.AddTransformer(&initTransformer{cluster: cluster})
   127  	return nil
   128  }
   129  
   130  func (c *clusterPlanBuilder) AddTransformer(transformer ...graph.Transformer) graph.PlanBuilder {
   131  	c.transformers = append(c.transformers, transformer...)
   132  	return c
   133  }
   134  
   135  func (c *clusterPlanBuilder) AddParallelTransformer(transformer ...graph.Transformer) graph.PlanBuilder {
   136  	c.transformers = append(c.transformers, &ParallelTransformers{transformers: transformer})
   137  	return c
   138  }
   139  
   140  // Build runs all transformers to generate a plan
   141  func (c *clusterPlanBuilder) Build() (graph.Plan, error) {
   142  	var err error
   143  	defer func() {
   144  		// set apply resource condition
   145  		// if cluster is being deleted, no need to set apply resource condition
   146  		if c.transCtx.Cluster.IsDeleting() {
   147  			return
   148  		}
   149  		preCheckCondition := meta.FindStatusCondition(c.transCtx.Cluster.Status.Conditions, appsv1alpha1.ConditionTypeProvisioningStarted)
   150  		if preCheckCondition == nil {
   151  			// this should not happen
   152  			return
   153  		}
   154  		// if pre-check failed, this is a fast return, no need to set apply resource condition
   155  		if preCheckCondition.Status != metav1.ConditionTrue {
   156  			sendWarningEventWithError(c.transCtx.GetRecorder(), c.transCtx.Cluster, ReasonPreCheckFailed, err)
   157  			return
   158  		}
   159  		setApplyResourceCondition(&c.transCtx.Cluster.Status.Conditions, c.transCtx.Cluster.Generation, err)
   160  		sendWarningEventWithError(c.transCtx.GetRecorder(), c.transCtx.Cluster, ReasonApplyResourcesFailed, err)
   161  	}()
   162  
   163  	// new a DAG and apply chain on it
   164  	dag := graph.NewDAG()
   165  	err = c.transformers.ApplyTo(c.transCtx, dag)
   166  	c.transCtx.Logger.V(1).Info(fmt.Sprintf("DAG: %s", dag))
   167  
   168  	// construct execution plan
   169  	plan := &clusterPlan{
   170  		dag:      dag,
   171  		walkFunc: c.defaultWalkFuncWithLogging,
   172  		cli:      c.cli,
   173  		transCtx: c.transCtx,
   174  	}
   175  	return plan, err
   176  }
   177  
   178  // Plan implementation
   179  
   180  func (p *clusterPlan) Execute() error {
   181  	less := func(v1, v2 graph.Vertex) bool {
   182  		getWeight := func(v graph.Vertex) int {
   183  			lifecycleVertex, ok := v.(*model.ObjectVertex)
   184  			if !ok {
   185  				return defaultWeight
   186  			}
   187  			switch lifecycleVertex.Obj.(type) {
   188  			case *appsv1alpha1.Cluster:
   189  				return clusterWeight
   190  			case *appsv1.StatefulSet, *appsv1.Deployment:
   191  				return workloadWeight
   192  			default:
   193  				return defaultWeight
   194  			}
   195  		}
   196  		return getWeight(v1) <= getWeight(v2)
   197  	}
   198  	err := p.dag.WalkReverseTopoOrder(p.walkFunc, less)
   199  	if err != nil {
   200  		if hErr := p.handlePlanExecutionError(err); hErr != nil {
   201  			return hErr
   202  		}
   203  	}
   204  	return err
   205  }
   206  
   207  func (p *clusterPlan) handlePlanExecutionError(err error) error {
   208  	clusterCopy := p.transCtx.OrigCluster.DeepCopy()
   209  	condition := newFailedApplyResourcesCondition(err)
   210  	meta.SetStatusCondition(&clusterCopy.Status.Conditions, condition)
   211  	sendWarningEventWithError(p.transCtx.GetRecorder(), clusterCopy, ReasonApplyResourcesFailed, err)
   212  	return p.cli.Status().Patch(p.transCtx.Context, clusterCopy, client.MergeFrom(p.transCtx.OrigCluster))
   213  }
   214  
   215  // Do the real works
   216  
   217  // NewClusterPlanBuilder returns a clusterPlanBuilder powered PlanBuilder
   218  func NewClusterPlanBuilder(ctx intctrlutil.RequestCtx, cli client.Client, req ctrl.Request) graph.PlanBuilder {
   219  	return &clusterPlanBuilder{
   220  		req: req,
   221  		cli: cli,
   222  		transCtx: &clusterTransformContext{
   223  			Context:       ctx.Ctx,
   224  			Client:        model.NewGraphClient(cli),
   225  			EventRecorder: ctx.Recorder,
   226  			Logger:        ctx.Log,
   227  		},
   228  	}
   229  }
   230  
   231  func (c *clusterPlanBuilder) defaultWalkFuncWithLogging(vertex graph.Vertex) error {
   232  	node, ok := vertex.(*model.ObjectVertex)
   233  	err := c.defaultWalkFunc(vertex)
   234  	switch {
   235  	case err == nil:
   236  		return err
   237  	case !ok:
   238  		c.transCtx.Logger.Error(err, "")
   239  	case node.Action == nil:
   240  		c.transCtx.Logger.Error(err, fmt.Sprintf("%T", node))
   241  	case apierrors.IsConflict(err):
   242  		return err
   243  	default:
   244  		c.transCtx.Logger.Error(err, fmt.Sprintf("%s %T error", *node.Action, node.Obj))
   245  	}
   246  	return err
   247  }
   248  
   249  func (c *clusterPlanBuilder) defaultWalkFunc(vertex graph.Vertex) error {
   250  	node, ok := vertex.(*model.ObjectVertex)
   251  	if !ok {
   252  		return fmt.Errorf("wrong vertex type %v", vertex)
   253  	}
   254  	if node.Action == nil {
   255  		return errors.New("node action can't be nil")
   256  	}
   257  
   258  	// cluster object has more business to do, handle them here
   259  	if _, ok = node.Obj.(*appsv1alpha1.Cluster); ok {
   260  		if err := c.reconcileCluster(node); err != nil {
   261  			return err
   262  		}
   263  	}
   264  	return c.reconcileObject(node)
   265  }
   266  
   267  func (c *clusterPlanBuilder) reconcileObject(node *model.ObjectVertex) error {
   268  	switch *node.Action {
   269  	case model.CREATE:
   270  		err := c.cli.Create(c.transCtx.Context, node.Obj)
   271  		if err != nil && !apierrors.IsAlreadyExists(err) {
   272  			return err
   273  		}
   274  	case model.UPDATE:
   275  		err := c.cli.Update(c.transCtx.Context, node.Obj)
   276  		if err != nil && !apierrors.IsNotFound(err) {
   277  			return err
   278  		}
   279  	case model.PATCH:
   280  		patch := client.MergeFrom(node.OriObj)
   281  		if err := c.cli.Patch(c.transCtx.Context, node.Obj, patch); err != nil && !apierrors.IsNotFound(err) {
   282  			return err
   283  		}
   284  	case model.DELETE:
   285  		if controllerutil.RemoveFinalizer(node.Obj, constant.DBClusterFinalizerName) {
   286  			err := c.cli.Update(c.transCtx.Context, node.Obj)
   287  			if err != nil && !apierrors.IsNotFound(err) {
   288  				return err
   289  			}
   290  		}
   291  		// delete secondary objects
   292  		if _, ok := node.Obj.(*appsv1alpha1.Cluster); !ok {
   293  			err := intctrlutil.BackgroundDeleteObject(c.cli, c.transCtx.Context, node.Obj)
   294  			if err != nil && !apierrors.IsNotFound(err) {
   295  				return err
   296  			}
   297  		}
   298  	case model.STATUS:
   299  		patch := client.MergeFrom(node.OriObj)
   300  		if err := c.cli.Status().Patch(c.transCtx.Context, node.Obj, patch); err != nil {
   301  			return err
   302  		}
   303  		// handle condition and phase changing triggered events
   304  		if newCluster, ok := node.Obj.(*appsv1alpha1.Cluster); ok {
   305  			oldCluster, _ := node.OriObj.(*appsv1alpha1.Cluster)
   306  			c.emitConditionUpdatingEvent(oldCluster.Status.Conditions, newCluster.Status.Conditions)
   307  			c.emitStatusUpdatingEvent(oldCluster.Status, newCluster.Status)
   308  		}
   309  	case model.NOOP:
   310  		// nothing
   311  	}
   312  	return nil
   313  }
   314  
   315  func (c *clusterPlanBuilder) reconcileCluster(node *model.ObjectVertex) error {
   316  	cluster := node.Obj.(*appsv1alpha1.Cluster).DeepCopy()
   317  	origCluster := node.OriObj.(*appsv1alpha1.Cluster)
   318  	switch *node.Action {
   319  	// cluster.meta and cluster.spec might change
   320  	case model.STATUS:
   321  		if !reflect.DeepEqual(cluster.ObjectMeta, origCluster.ObjectMeta) || !reflect.DeepEqual(cluster.Spec, origCluster.Spec) {
   322  			patch := client.MergeFrom(origCluster.DeepCopy())
   323  			if err := c.cli.Patch(c.transCtx.Context, cluster, patch); err != nil {
   324  				return err
   325  			}
   326  		}
   327  	case model.CREATE, model.UPDATE:
   328  		return fmt.Errorf("cluster can't be created or updated: %s", cluster.Name)
   329  	}
   330  	return nil
   331  }
   332  
   333  func (c *clusterPlanBuilder) emitConditionUpdatingEvent(oldConditions, newConditions []metav1.Condition) {
   334  	for _, newCondition := range newConditions {
   335  		oldCondition := meta.FindStatusCondition(oldConditions, newCondition.Type)
   336  		// filtered in cluster creation
   337  		if oldCondition == nil && newCondition.Status == metav1.ConditionFalse {
   338  			return
   339  		}
   340  		if !reflect.DeepEqual(oldCondition, &newCondition) {
   341  			eType := corev1.EventTypeNormal
   342  			if newCondition.Status == metav1.ConditionFalse {
   343  				eType = corev1.EventTypeWarning
   344  			}
   345  			c.transCtx.EventRecorder.Event(c.transCtx.Cluster, eType, newCondition.Reason, newCondition.Message)
   346  		}
   347  	}
   348  }
   349  
   350  func (c *clusterPlanBuilder) emitStatusUpdatingEvent(oldStatus, newStatus appsv1alpha1.ClusterStatus) {
   351  	cluster := c.transCtx.Cluster
   352  	newPhase := newStatus.Phase
   353  	if newPhase == oldStatus.Phase {
   354  		return
   355  	}
   356  	eType := corev1.EventTypeNormal
   357  	message := ""
   358  	switch newPhase {
   359  	case appsv1alpha1.RunningClusterPhase:
   360  		message = fmt.Sprintf("Cluster: %s is ready, current phase is %s", cluster.Name, newPhase)
   361  	case appsv1alpha1.StoppedClusterPhase:
   362  		message = fmt.Sprintf("Cluster: %s stopped successfully.", cluster.Name)
   363  	case appsv1alpha1.FailedClusterPhase, appsv1alpha1.AbnormalClusterPhase:
   364  		message = fmt.Sprintf("Cluster: %s is %s, check according to the components message", cluster.Name, newPhase)
   365  		eType = corev1.EventTypeWarning
   366  	}
   367  	if len(message) > 0 {
   368  		c.transCtx.EventRecorder.Event(cluster, eType, string(newPhase), message)
   369  	}
   370  }