github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/controller/rsm/pod_role_event_handler.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package rsm 21 22 import ( 23 "encoding/json" 24 "fmt" 25 "regexp" 26 "strings" 27 "time" 28 29 corev1 "k8s.io/api/core/v1" 30 "k8s.io/apimachinery/pkg/types" 31 "k8s.io/client-go/tools/record" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 34 workloads "github.com/1aal/kubeblocks/apis/workloads/v1alpha1" 35 "github.com/1aal/kubeblocks/pkg/common" 36 "github.com/1aal/kubeblocks/pkg/constant" 37 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 38 ) 39 40 type PodRoleEventHandler struct{} 41 42 // probeEventType defines the type of probe event. 43 type probeEventType string 44 45 const ( 46 successEvent = "Success" 47 roleChangedEvent = "roleChanged" 48 ) 49 50 type probeMessage struct { 51 Event probeEventType `json:"event,omitempty"` 52 Message string `json:"message,omitempty"` 53 OriginalRole string `json:"originalRole,omitempty"` 54 Role string `json:"role,omitempty"` 55 } 56 57 const ( 58 // roleChangedAnnotKey is used to mark the role change event has been handled. 59 roleChangedAnnotKey = "role.kubeblocks.io/event-handled" 60 ) 61 62 var roleMessageRegex = regexp.MustCompile(`Readiness probe failed: .*({.*})`) 63 64 func (h *PodRoleEventHandler) Handle(cli client.Client, reqCtx intctrlutil.RequestCtx, recorder record.EventRecorder, event *corev1.Event) error { 65 if event.InvolvedObject.FieldPath != readinessProbeEventFieldPath && 66 event.InvolvedObject.FieldPath != legacyEventFieldPath && 67 event.InvolvedObject.FieldPath != lorryEventFieldPath && 68 event.Reason != checkRoleEventReason { 69 return nil 70 } 71 var ( 72 err error 73 annotations = event.GetAnnotations() 74 ) 75 // filter role changed event that has been handled 76 count := fmt.Sprintf("count-%d", event.Count) 77 if annotations != nil && annotations[roleChangedAnnotKey] == count { 78 return nil 79 } 80 81 if _, err = handleRoleChangedEvent(cli, reqCtx, recorder, event); err != nil { 82 return err 83 } 84 85 // event order is crucial in role probing, but it's not guaranteed when controller restarted, so we have to mark them to be filtered 86 patch := client.MergeFrom(event.DeepCopy()) 87 if event.Annotations == nil { 88 event.Annotations = make(map[string]string, 0) 89 } 90 event.Annotations[roleChangedAnnotKey] = count 91 return cli.Patch(reqCtx.Ctx, event, patch) 92 } 93 94 // handleRoleChangedEvent handles role changed event and return role. 95 func handleRoleChangedEvent(cli client.Client, reqCtx intctrlutil.RequestCtx, recorder record.EventRecorder, event *corev1.Event) (string, error) { 96 // parse probe event message 97 message := parseProbeEventMessage(reqCtx, event) 98 if message == nil { 99 reqCtx.Log.Info("parse probe event message failed", "message", event.Message) 100 return "", nil 101 } 102 103 // if probe event operation is not impl, check role failed or role invalid, ignore it 104 if message.Event != successEvent && message.Event != roleChangedEvent { 105 reqCtx.Log.Info("probe event failed", "message", message.Message) 106 return "", nil 107 } 108 role := strings.ToLower(message.Role) 109 110 snapshot := parseGlobalRoleSnapshot(role, event) 111 for _, pair := range snapshot.PodRoleNamePairs { 112 podName := types.NamespacedName{ 113 Namespace: event.InvolvedObject.Namespace, 114 Name: pair.PodName, 115 } 116 // get pod 117 pod := &corev1.Pod{} 118 if err := cli.Get(reqCtx.Ctx, podName, pod); err != nil { 119 return pair.RoleName, err 120 } 121 // event belongs to old pod with the same name, ignore it 122 if pod.Name == pair.PodName && pod.UID != event.InvolvedObject.UID { 123 return pair.RoleName, nil 124 } 125 126 // compare the version of the current role snapshot with the last version recorded in the pod annotation, 127 // stale role snapshot will be ignored. 128 lastSnapshotVersion, ok := pod.Annotations[constant.LastRoleSnapshotVersionAnnotationKey] 129 if ok { 130 131 if snapshot.Version <= lastSnapshotVersion { 132 reqCtx.Log.Info("stale role snapshot received, ignore it", "snapshot", snapshot) 133 return pair.RoleName, nil 134 } 135 } 136 137 name, _ := intctrlutil.GetParentNameAndOrdinal(pod) 138 rsm := &workloads.ReplicatedStateMachine{} 139 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: pod.Namespace, Name: name}, rsm); err != nil { 140 return "", err 141 } 142 reqCtx.Log.V(1).Info("handle role change event", "pod", pod.Name, "role", role, "originalRole", message.OriginalRole) 143 144 if err := updatePodRoleLabel(cli, reqCtx, *rsm, pod, pair.RoleName, snapshot.Version); err != nil { 145 return "", err 146 } 147 } 148 return role, nil 149 } 150 151 func parseGlobalRoleSnapshot(role string, event *corev1.Event) *common.GlobalRoleSnapshot { 152 snapshot := &common.GlobalRoleSnapshot{} 153 if err := json.Unmarshal([]byte(role), snapshot); err == nil { 154 return snapshot 155 } 156 snapshot.Version = event.EventTime.Time.Format(time.RFC3339Nano) 157 pair := common.PodRoleNamePair{ 158 PodName: event.InvolvedObject.Name, 159 RoleName: role, 160 } 161 snapshot.PodRoleNamePairs = append(snapshot.PodRoleNamePairs, pair) 162 return snapshot 163 } 164 165 // parseProbeEventMessage parses probe event message. 166 func parseProbeEventMessage(reqCtx intctrlutil.RequestCtx, event *corev1.Event) *probeMessage { 167 message := &probeMessage{} 168 169 tryUnmarshalDirectAPIServerEvent := func() error { 170 return json.Unmarshal([]byte(event.Message), message) 171 } 172 tryUnmarshalReadinessProbeEvent := func() error { 173 matches := roleMessageRegex.FindStringSubmatch(event.Message) 174 if len(matches) != 2 { 175 reqCtx.Log.Info("parser Readiness probe event message failed", "message", event.Message) 176 return fmt.Errorf("parser Readiness probe event message failed: %s", event.Message) 177 } 178 msg := matches[1] 179 err := json.Unmarshal([]byte(msg), message) 180 if err != nil { 181 // not role related message, ignore it 182 reqCtx.Log.Info("not role message", "message", event.Message, "error", err) 183 return err 184 } 185 return nil 186 } 187 188 if err := tryUnmarshalDirectAPIServerEvent(); err == nil { 189 return message 190 } 191 if err := tryUnmarshalReadinessProbeEvent(); err == nil { 192 return message 193 } 194 return nil 195 }