github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/controller/rsm/pod_role_event_handler.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package rsm
    21  
    22  import (
    23  	"encoding/json"
    24  	"fmt"
    25  	"regexp"
    26  	"strings"
    27  	"time"
    28  
    29  	corev1 "k8s.io/api/core/v1"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	"k8s.io/client-go/tools/record"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  
    34  	workloads "github.com/1aal/kubeblocks/apis/workloads/v1alpha1"
    35  	"github.com/1aal/kubeblocks/pkg/common"
    36  	"github.com/1aal/kubeblocks/pkg/constant"
    37  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    38  )
    39  
    40  type PodRoleEventHandler struct{}
    41  
    42  // probeEventType defines the type of probe event.
    43  type probeEventType string
    44  
    45  const (
    46  	successEvent     = "Success"
    47  	roleChangedEvent = "roleChanged"
    48  )
    49  
    50  type probeMessage struct {
    51  	Event        probeEventType `json:"event,omitempty"`
    52  	Message      string         `json:"message,omitempty"`
    53  	OriginalRole string         `json:"originalRole,omitempty"`
    54  	Role         string         `json:"role,omitempty"`
    55  }
    56  
    57  const (
    58  	// roleChangedAnnotKey is used to mark the role change event has been handled.
    59  	roleChangedAnnotKey = "role.kubeblocks.io/event-handled"
    60  )
    61  
    62  var roleMessageRegex = regexp.MustCompile(`Readiness probe failed: .*({.*})`)
    63  
    64  func (h *PodRoleEventHandler) Handle(cli client.Client, reqCtx intctrlutil.RequestCtx, recorder record.EventRecorder, event *corev1.Event) error {
    65  	if event.InvolvedObject.FieldPath != readinessProbeEventFieldPath &&
    66  		event.InvolvedObject.FieldPath != legacyEventFieldPath &&
    67  		event.InvolvedObject.FieldPath != lorryEventFieldPath &&
    68  		event.Reason != checkRoleEventReason {
    69  		return nil
    70  	}
    71  	var (
    72  		err         error
    73  		annotations = event.GetAnnotations()
    74  	)
    75  	// filter role changed event that has been handled
    76  	count := fmt.Sprintf("count-%d", event.Count)
    77  	if annotations != nil && annotations[roleChangedAnnotKey] == count {
    78  		return nil
    79  	}
    80  
    81  	if _, err = handleRoleChangedEvent(cli, reqCtx, recorder, event); err != nil {
    82  		return err
    83  	}
    84  
    85  	// event order is crucial in role probing, but it's not guaranteed when controller restarted, so we have to mark them to be filtered
    86  	patch := client.MergeFrom(event.DeepCopy())
    87  	if event.Annotations == nil {
    88  		event.Annotations = make(map[string]string, 0)
    89  	}
    90  	event.Annotations[roleChangedAnnotKey] = count
    91  	return cli.Patch(reqCtx.Ctx, event, patch)
    92  }
    93  
    94  // handleRoleChangedEvent handles role changed event and return role.
    95  func handleRoleChangedEvent(cli client.Client, reqCtx intctrlutil.RequestCtx, recorder record.EventRecorder, event *corev1.Event) (string, error) {
    96  	// parse probe event message
    97  	message := parseProbeEventMessage(reqCtx, event)
    98  	if message == nil {
    99  		reqCtx.Log.Info("parse probe event message failed", "message", event.Message)
   100  		return "", nil
   101  	}
   102  
   103  	// if probe event operation is not impl, check role failed or role invalid, ignore it
   104  	if message.Event != successEvent && message.Event != roleChangedEvent {
   105  		reqCtx.Log.Info("probe event failed", "message", message.Message)
   106  		return "", nil
   107  	}
   108  	role := strings.ToLower(message.Role)
   109  
   110  	snapshot := parseGlobalRoleSnapshot(role, event)
   111  	for _, pair := range snapshot.PodRoleNamePairs {
   112  		podName := types.NamespacedName{
   113  			Namespace: event.InvolvedObject.Namespace,
   114  			Name:      pair.PodName,
   115  		}
   116  		// get pod
   117  		pod := &corev1.Pod{}
   118  		if err := cli.Get(reqCtx.Ctx, podName, pod); err != nil {
   119  			return pair.RoleName, err
   120  		}
   121  		// event belongs to old pod with the same name, ignore it
   122  		if pod.Name == pair.PodName && pod.UID != event.InvolvedObject.UID {
   123  			return pair.RoleName, nil
   124  		}
   125  
   126  		// compare the version of the current role snapshot with the last version recorded in the pod annotation,
   127  		// stale role snapshot will be ignored.
   128  		lastSnapshotVersion, ok := pod.Annotations[constant.LastRoleSnapshotVersionAnnotationKey]
   129  		if ok {
   130  
   131  			if snapshot.Version <= lastSnapshotVersion {
   132  				reqCtx.Log.Info("stale role snapshot received, ignore it", "snapshot", snapshot)
   133  				return pair.RoleName, nil
   134  			}
   135  		}
   136  
   137  		name, _ := intctrlutil.GetParentNameAndOrdinal(pod)
   138  		rsm := &workloads.ReplicatedStateMachine{}
   139  		if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: pod.Namespace, Name: name}, rsm); err != nil {
   140  			return "", err
   141  		}
   142  		reqCtx.Log.V(1).Info("handle role change event", "pod", pod.Name, "role", role, "originalRole", message.OriginalRole)
   143  
   144  		if err := updatePodRoleLabel(cli, reqCtx, *rsm, pod, pair.RoleName, snapshot.Version); err != nil {
   145  			return "", err
   146  		}
   147  	}
   148  	return role, nil
   149  }
   150  
   151  func parseGlobalRoleSnapshot(role string, event *corev1.Event) *common.GlobalRoleSnapshot {
   152  	snapshot := &common.GlobalRoleSnapshot{}
   153  	if err := json.Unmarshal([]byte(role), snapshot); err == nil {
   154  		return snapshot
   155  	}
   156  	snapshot.Version = event.EventTime.Time.Format(time.RFC3339Nano)
   157  	pair := common.PodRoleNamePair{
   158  		PodName:  event.InvolvedObject.Name,
   159  		RoleName: role,
   160  	}
   161  	snapshot.PodRoleNamePairs = append(snapshot.PodRoleNamePairs, pair)
   162  	return snapshot
   163  }
   164  
   165  // parseProbeEventMessage parses probe event message.
   166  func parseProbeEventMessage(reqCtx intctrlutil.RequestCtx, event *corev1.Event) *probeMessage {
   167  	message := &probeMessage{}
   168  
   169  	tryUnmarshalDirectAPIServerEvent := func() error {
   170  		return json.Unmarshal([]byte(event.Message), message)
   171  	}
   172  	tryUnmarshalReadinessProbeEvent := func() error {
   173  		matches := roleMessageRegex.FindStringSubmatch(event.Message)
   174  		if len(matches) != 2 {
   175  			reqCtx.Log.Info("parser Readiness probe event message failed", "message", event.Message)
   176  			return fmt.Errorf("parser Readiness probe event message failed: %s", event.Message)
   177  		}
   178  		msg := matches[1]
   179  		err := json.Unmarshal([]byte(msg), message)
   180  		if err != nil {
   181  			// not role related message, ignore it
   182  			reqCtx.Log.Info("not role message", "message", event.Message, "error", err)
   183  			return err
   184  		}
   185  		return nil
   186  	}
   187  
   188  	if err := tryUnmarshalDirectAPIServerEvent(); err == nil {
   189  		return message
   190  	}
   191  	if err := tryUnmarshalReadinessProbeEvent(); err == nil {
   192  		return message
   193  	}
   194  	return nil
   195  }