github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/lorry/operations/replica/checkrole.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package replica
    21  
    22  import (
    23  	"context"
    24  	"encoding/json"
    25  	"fmt"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/go-logr/logr"
    30  	"github.com/pkg/errors"
    31  	"github.com/spf13/viper"
    32  	ctrl "sigs.k8s.io/controller-runtime"
    33  
    34  	"github.com/1aal/kubeblocks/pkg/constant"
    35  	"github.com/1aal/kubeblocks/pkg/lorry/dcs"
    36  	"github.com/1aal/kubeblocks/pkg/lorry/engines/register"
    37  	"github.com/1aal/kubeblocks/pkg/lorry/operations"
    38  	"github.com/1aal/kubeblocks/pkg/lorry/util"
    39  )
    40  
    41  // AccessMode defines SVC access mode enums.
    42  // +enum
    43  type AccessMode string
    44  
    45  type CheckRole struct {
    46  	operations.Base
    47  	logger                     logr.Logger
    48  	dcsStore                   dcs.DCS
    49  	OriRole                    string
    50  	CheckRoleFailedCount       int
    51  	FailedEventReportFrequency int
    52  	ProbeTimeout               time.Duration
    53  	DBRoles                    map[string]AccessMode
    54  }
    55  
    56  var checkrole operations.Operation = &CheckRole{}
    57  
    58  func init() {
    59  	err := operations.Register(strings.ToLower(string(util.CheckRoleOperation)), checkrole)
    60  	if err != nil {
    61  		panic(err.Error())
    62  	}
    63  }
    64  
    65  func (s *CheckRole) Init(ctx context.Context) error {
    66  	s.dcsStore = dcs.GetStore()
    67  	if s.dcsStore == nil {
    68  		return errors.New("dcs store init failed")
    69  	}
    70  
    71  	val := viper.GetString("KB_SERVICE_ROLES")
    72  	if val != "" {
    73  		if err := json.Unmarshal([]byte(val), &s.DBRoles); err != nil {
    74  			fmt.Println(errors.Wrap(err, "KB_DB_ROLES env format error").Error())
    75  		}
    76  	}
    77  
    78  	s.FailedEventReportFrequency = viper.GetInt("KB_FAILED_EVENT_REPORT_FREQUENCY")
    79  	if s.FailedEventReportFrequency < 300 {
    80  		s.FailedEventReportFrequency = 300
    81  	} else if s.FailedEventReportFrequency > 3600 {
    82  		s.FailedEventReportFrequency = 3600
    83  	}
    84  
    85  	timeoutSeconds := util.DefaultProbeTimeoutSeconds
    86  	if viper.IsSet(constant.KBEnvRoleProbeTimeout) {
    87  		timeoutSeconds = viper.GetInt(constant.KBEnvRoleProbeTimeout)
    88  	}
    89  	// lorry utilizes the pod readiness probe to trigger role probe and 'timeoutSeconds' is directly copied from the 'probe.timeoutSeconds' field of pod.
    90  	// here we give 80% of the total time to role probe job and leave the remaining 20% to kubelet to handle the readiness probe related tasks.
    91  	s.ProbeTimeout = time.Duration(timeoutSeconds) * (800 * time.Millisecond)
    92  	s.logger = ctrl.Log.WithName("checkrole")
    93  	return nil
    94  }
    95  
    96  func (s *CheckRole) IsReadonly(ctx context.Context) bool {
    97  	return true
    98  }
    99  
   100  func (s *CheckRole) Do(ctx context.Context, req *operations.OpsRequest) (*operations.OpsResponse, error) {
   101  	manager, err := register.GetDBManager()
   102  	if err != nil {
   103  		return nil, errors.Wrap(err, "get manager failed")
   104  	}
   105  
   106  	resp := &operations.OpsResponse{
   107  		Data: map[string]any{},
   108  	}
   109  	resp.Data["operation"] = util.CheckRoleOperation
   110  	resp.Data["originalRole"] = s.OriRole
   111  
   112  	if !manager.IsDBStartupReady() {
   113  		resp.Data["message"] = "db not ready"
   114  		return resp, nil
   115  	}
   116  
   117  	cluster := s.dcsStore.GetClusterFromCache()
   118  
   119  	ctx1, cancel := context.WithTimeout(ctx, s.ProbeTimeout)
   120  	defer cancel()
   121  	role, err := manager.GetReplicaRole(ctx1, cluster)
   122  	if err != nil {
   123  		s.logger.Info("executing checkRole error", "error", err)
   124  		// do not return err, as it will cause readinessprobe to fail
   125  		err = nil
   126  		if s.CheckRoleFailedCount%s.FailedEventReportFrequency == 0 {
   127  			s.logger.Info("role checks failed continuously", "times", s.CheckRoleFailedCount)
   128  			// if err is not nil, send event through kubelet readinessprobe
   129  			err = util.SentEventForProbe(ctx, resp.Data)
   130  		}
   131  		s.CheckRoleFailedCount++
   132  		return resp, err
   133  	}
   134  
   135  	s.CheckRoleFailedCount = 0
   136  	if isValid, message := s.roleValidate(role); !isValid {
   137  		resp.Data["message"] = message
   138  		return resp, nil
   139  	}
   140  
   141  	resp.Data["role"] = role
   142  	if s.OriRole == role {
   143  		return nil, nil
   144  	}
   145  	resp.Data["event"] = util.OperationSuccess
   146  	s.OriRole = role
   147  	err = util.SentEventForProbe(ctx, resp.Data)
   148  	return resp, err
   149  }
   150  
   151  // Component may have some internal roles that needn't be exposed to end user,
   152  // and not configured in cluster definition, e.g. ETCD's Candidate.
   153  // roleValidate is used to filter the internal roles and decrease the number
   154  // of report events to reduce the possibility of event conflicts.
   155  func (s *CheckRole) roleValidate(role string) (bool, string) {
   156  	// do not validate them when db roles setting is missing
   157  	if len(s.DBRoles) == 0 {
   158  		return true, ""
   159  	}
   160  
   161  	var msg string
   162  	isValid := false
   163  	for r := range s.DBRoles {
   164  		if strings.EqualFold(r, role) {
   165  			isValid = true
   166  			break
   167  		}
   168  	}
   169  	if !isValid {
   170  		msg = fmt.Sprintf("role %s is not configured in cluster definition %v", role, s.DBRoles)
   171  	}
   172  	return isValid, msg
   173  }