github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/operations/switchover.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package operations 21 22 import ( 23 "encoding/json" 24 "fmt" 25 "reflect" 26 "time" 27 28 "github.com/pkg/errors" 29 "k8s.io/apimachinery/pkg/api/meta" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/client-go/tools/record" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 34 appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1" 35 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 36 ) 37 38 type switchoverOpsHandler struct{} 39 40 var _ OpsHandler = switchoverOpsHandler{} 41 42 // SwitchoverMessage is the OpsRequest.Status.Condition.Message for switchover. 43 type SwitchoverMessage struct { 44 appsv1alpha1.Switchover 45 OldPrimary string 46 Cluster string 47 } 48 49 func init() { 50 switchoverBehaviour := OpsBehaviour{ 51 FromClusterPhases: appsv1alpha1.GetClusterUpRunningPhases(), 52 ToClusterPhase: appsv1alpha1.UpdatingClusterPhase, 53 OpsHandler: switchoverOpsHandler{}, 54 ProcessingReasonInClusterCondition: ProcessingReasonSwitchovering, 55 } 56 57 opsMgr := GetOpsManager() 58 opsMgr.RegisterOps(appsv1alpha1.SwitchoverType, switchoverBehaviour) 59 } 60 61 // ActionStartedCondition the started condition when handle the switchover request. 62 func (r switchoverOpsHandler) ActionStartedCondition(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) (*metav1.Condition, error) { 63 switchoverMessageMap := make(map[string]SwitchoverMessage) 64 for _, switchover := range opsRes.OpsRequest.Spec.SwitchoverList { 65 pod, err := getPrimaryOrLeaderPod(reqCtx.Ctx, cli, *opsRes.Cluster, switchover.ComponentName, opsRes.Cluster.Spec.GetComponentDefRefName(switchover.ComponentName)) 66 if err != nil { 67 return nil, err 68 } 69 switchoverMessageMap[switchover.ComponentName] = SwitchoverMessage{ 70 Switchover: switchover, 71 OldPrimary: pod.Name, 72 Cluster: opsRes.Cluster.Name, 73 } 74 } 75 msg, err := json.Marshal(switchoverMessageMap) 76 if err != nil { 77 return nil, err 78 } 79 return appsv1alpha1.NewSwitchoveringCondition(opsRes.Cluster.Generation, string(msg)), nil 80 } 81 82 // Action to do the switchover operation. 83 func (r switchoverOpsHandler) Action(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) error { 84 return doSwitchoverComponents(reqCtx, cli, opsRes, opsRes.OpsRequest.Spec.SwitchoverList) 85 } 86 87 // ReconcileAction will be performed when action is done and loops till OpsRequest.status.phase is Succeed/Failed. 88 // the Reconcile function for switchover opsRequest. 89 func (r switchoverOpsHandler) ReconcileAction(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) (appsv1alpha1.OpsPhase, time.Duration, error) { 90 var ( 91 opsRequestPhase = appsv1alpha1.OpsRunningPhase 92 ) 93 94 expectCount, actualCount, err := handleSwitchoverProgress(reqCtx, cli, opsRes) 95 if err != nil { 96 return "", 0, err 97 } 98 99 if expectCount == actualCount { 100 opsRequestPhase = appsv1alpha1.OpsSucceedPhase 101 } 102 103 return opsRequestPhase, time.Second, nil 104 } 105 106 // SaveLastConfiguration this operation only restart the pods of the component, no changes for Cluster.spec. 107 // empty implementation here. 108 func (r switchoverOpsHandler) SaveLastConfiguration(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) error { 109 return nil 110 } 111 112 // doSwitchoverComponents creates the switchover job for each component. 113 func doSwitchoverComponents(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource, switchoverList []appsv1alpha1.Switchover) error { 114 var ( 115 opsRequest = opsRes.OpsRequest 116 oldOpsRequestStatus = opsRequest.Status.DeepCopy() 117 ) 118 patch := client.MergeFrom(opsRequest.DeepCopy()) 119 if opsRequest.Status.Components == nil { 120 opsRequest.Status.Components = make(map[string]appsv1alpha1.OpsRequestComponentStatus) 121 } 122 for _, switchover := range switchoverList { 123 compDef, err := appsv1alpha1.GetComponentDefByCluster(reqCtx.Ctx, cli, *opsRes.Cluster, switchover.ComponentName) 124 if err != nil { 125 return err 126 } 127 needSwitchover, err := needDoSwitchover(reqCtx.Ctx, cli, opsRes.Cluster, opsRes.Cluster.Spec.GetComponentByName(switchover.ComponentName), &switchover) 128 if err != nil { 129 return err 130 } 131 if !needSwitchover { 132 opsRequest.Status.Components[switchover.ComponentName] = appsv1alpha1.OpsRequestComponentStatus{ 133 Phase: appsv1alpha1.RunningClusterCompPhase, 134 Reason: OpsReasonForSkipSwitchover, 135 Message: fmt.Sprintf("This component %s is already in the expected state, skip the switchover operation", switchover.ComponentName), 136 ProgressDetails: []appsv1alpha1.ProgressStatusDetail{}, 137 } 138 continue 139 } else { 140 opsRequest.Status.Components[switchover.ComponentName] = appsv1alpha1.OpsRequestComponentStatus{ 141 Phase: appsv1alpha1.UpdatingClusterCompPhase, 142 ProgressDetails: []appsv1alpha1.ProgressStatusDetail{}, 143 } 144 } 145 if err := createSwitchoverJob(reqCtx, cli, opsRes.Cluster, opsRes.Cluster.Spec.GetComponentByName(switchover.ComponentName), compDef, &switchover); err != nil { 146 return err 147 } 148 } 149 if !reflect.DeepEqual(*oldOpsRequestStatus, opsRequest.Status) { 150 if err := cli.Status().Patch(reqCtx.Ctx, opsRequest, patch); err != nil { 151 return err 152 } 153 } 154 return nil 155 } 156 157 // handleSwitchoverProgress handles the component progressDetails during switchover. 158 // Returns: 159 // - expectCount: the expected count of switchover operations 160 // - completedCount: the number of completed switchover operations 161 // - error: any error that occurred during the handling 162 func handleSwitchoverProgress(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) (int32, int32, error) { 163 var ( 164 expectCount = int32(len(opsRes.OpsRequest.Spec.SwitchoverList)) 165 completedCount int32 166 opsRequest = opsRes.OpsRequest 167 oldOpsRequestStatus = opsRequest.Status.DeepCopy() 168 compDef *appsv1alpha1.ClusterComponentDefinition 169 consistency bool 170 err error 171 ) 172 patch := client.MergeFrom(opsRequest.DeepCopy()) 173 succeedJobs := make([]string, 0, len(opsRes.OpsRequest.Spec.SwitchoverList)) 174 for _, switchover := range opsRequest.Spec.SwitchoverList { 175 switchoverCondition := meta.FindStatusCondition(opsRes.OpsRequest.Status.Conditions, appsv1alpha1.ConditionTypeSwitchover) 176 if switchoverCondition == nil { 177 err = errors.New("switchover condition is nil") 178 break 179 } 180 181 // if the component do not need switchover, skip it 182 reason := opsRequest.Status.Components[switchover.ComponentName].Reason 183 if reason == OpsReasonForSkipSwitchover { 184 completedCount += 1 185 continue 186 } 187 188 // check the current component switchoverJob whether succeed 189 jobName := genSwitchoverJobName(opsRes.Cluster.Name, switchover.ComponentName, switchoverCondition.ObservedGeneration) 190 checkJobProcessDetail := appsv1alpha1.ProgressStatusDetail{ 191 ObjectKey: getProgressObjectKey(SwitchoverCheckJobKey, jobName), 192 Status: appsv1alpha1.ProcessingProgressStatus, 193 } 194 if err = checkJobSucceed(reqCtx.Ctx, cli, opsRes.Cluster, jobName); err != nil { 195 checkJobProcessDetail.Message = fmt.Sprintf("switchover job %s is not succeed", jobName) 196 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkJobProcessDetail, switchover.ComponentName) 197 continue 198 } else { 199 checkJobProcessDetail.Message = fmt.Sprintf("switchover job %s is succeed", jobName) 200 checkJobProcessDetail.Status = appsv1alpha1.SucceedProgressStatus 201 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkJobProcessDetail, switchover.ComponentName) 202 } 203 204 // check the current component pod role label whether correct 205 checkRoleLabelProcessDetail := appsv1alpha1.ProgressStatusDetail{ 206 ObjectKey: getProgressObjectKey(SwitchoverCheckRoleLabelKey, switchover.ComponentName), 207 Status: appsv1alpha1.ProcessingProgressStatus, 208 Message: fmt.Sprintf("waiting for component %s pod role label consistency after switchover", switchover.ComponentName), 209 } 210 compDef, err = appsv1alpha1.GetComponentDefByCluster(reqCtx.Ctx, cli, *opsRes.Cluster, switchover.ComponentName) 211 if err != nil { 212 checkRoleLabelProcessDetail.Message = fmt.Sprintf("handleSwitchoverProgress get component %s definition failed", switchover.ComponentName) 213 checkRoleLabelProcessDetail.Status = appsv1alpha1.FailedProgressStatus 214 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkRoleLabelProcessDetail, switchover.ComponentName) 215 continue 216 } 217 consistency, err = checkPodRoleLabelConsistency(reqCtx.Ctx, cli, opsRes.Cluster, opsRes.Cluster.Spec.GetComponentByName(switchover.ComponentName), compDef, &switchover, switchoverCondition) 218 if err != nil { 219 checkRoleLabelProcessDetail.Message = fmt.Sprintf("waiting for component %s pod role label consistency after switchover", switchover.ComponentName) 220 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkRoleLabelProcessDetail, switchover.ComponentName) 221 continue 222 } 223 224 if !consistency { 225 err = intctrlutil.NewErrorf(intctrlutil.ErrorWaitCacheRefresh, "requeue to waiting for pod role label consistency.") 226 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkRoleLabelProcessDetail, switchover.ComponentName) 227 continue 228 } else { 229 checkRoleLabelProcessDetail.Message = fmt.Sprintf("check component %s pod role label consistency after switchover is succeed", switchover.ComponentName) 230 checkRoleLabelProcessDetail.Status = appsv1alpha1.SucceedProgressStatus 231 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.UpdatingClusterCompPhase, checkRoleLabelProcessDetail, switchover.ComponentName) 232 } 233 234 // component switchover is successful 235 completedCount += 1 236 succeedJobs = append(succeedJobs, jobName) 237 componentProcessDetail := appsv1alpha1.ProgressStatusDetail{ 238 ObjectKey: switchover.ComponentName, 239 Message: fmt.Sprintf("switchover job %s is succeed", jobName), 240 Status: appsv1alpha1.SucceedProgressStatus, 241 } 242 setComponentSwitchoverProgressDetails(reqCtx.Recorder, opsRequest, appsv1alpha1.RunningClusterCompPhase, componentProcessDetail, switchover.ComponentName) 243 } 244 245 opsRequest.Status.Progress = fmt.Sprintf("%d/%d", completedCount, expectCount) 246 // patch OpsRequest.status.components 247 if !reflect.DeepEqual(*oldOpsRequestStatus, opsRequest.Status) { 248 if err := cli.Status().Patch(reqCtx.Ctx, opsRequest, patch); err != nil { 249 return expectCount, 0, err 250 } 251 } 252 253 if err != nil { 254 return expectCount, completedCount, err 255 } 256 257 if completedCount == expectCount { 258 for _, jobName := range succeedJobs { 259 if err := cleanJobByName(reqCtx.Ctx, cli, opsRes.Cluster, jobName); err != nil { 260 reqCtx.Log.Error(err, "clean switchover job failed", "jobName", jobName) 261 return expectCount, completedCount, err 262 } 263 } 264 } 265 266 return expectCount, completedCount, nil 267 } 268 269 // setComponentSwitchoverProgressDetails sets component switchover progress details. 270 func setComponentSwitchoverProgressDetails(recorder record.EventRecorder, 271 opsRequest *appsv1alpha1.OpsRequest, 272 phase appsv1alpha1.ClusterComponentPhase, 273 processDetail appsv1alpha1.ProgressStatusDetail, 274 componentName string) { 275 componentProcessDetails := opsRequest.Status.Components[componentName].ProgressDetails 276 setComponentStatusProgressDetail(recorder, opsRequest, &componentProcessDetails, processDetail) 277 opsRequest.Status.Components[componentName] = appsv1alpha1.OpsRequestComponentStatus{ 278 Phase: phase, 279 ProgressDetails: componentProcessDetails, 280 } 281 }