vitess.io/vitess@v0.16.2/go/vt/vtorc/inst/analysis.go (about)

     1  /*
     2     Copyright 2015 Shlomi Noach, courtesy Booking.com
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package inst
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    26  	"vitess.io/vitess/go/vt/vtorc/config"
    27  )
    28  
    29  type AnalysisCode string
    30  type StructureAnalysisCode string
    31  
    32  const (
    33  	NoProblem                              AnalysisCode = "NoProblem"
    34  	ClusterHasNoPrimary                    AnalysisCode = "ClusterHasNoPrimary"
    35  	DeadPrimaryWithoutReplicas             AnalysisCode = "DeadPrimaryWithoutReplicas"
    36  	DeadPrimary                            AnalysisCode = "DeadPrimary"
    37  	DeadPrimaryAndReplicas                 AnalysisCode = "DeadPrimaryAndReplicas"
    38  	DeadPrimaryAndSomeReplicas             AnalysisCode = "DeadPrimaryAndSomeReplicas"
    39  	PrimaryHasPrimary                      AnalysisCode = "PrimaryHasPrimary"
    40  	PrimaryIsReadOnly                      AnalysisCode = "PrimaryIsReadOnly"
    41  	PrimarySemiSyncMustBeSet               AnalysisCode = "PrimarySemiSyncMustBeSet"
    42  	PrimarySemiSyncMustNotBeSet            AnalysisCode = "PrimarySemiSyncMustNotBeSet"
    43  	ReplicaIsWritable                      AnalysisCode = "ReplicaIsWritable"
    44  	NotConnectedToPrimary                  AnalysisCode = "NotConnectedToPrimary"
    45  	ConnectedToWrongPrimary                AnalysisCode = "ConnectedToWrongPrimary"
    46  	ReplicationStopped                     AnalysisCode = "ReplicationStopped"
    47  	ReplicaSemiSyncMustBeSet               AnalysisCode = "ReplicaSemiSyncMustBeSet"
    48  	ReplicaSemiSyncMustNotBeSet            AnalysisCode = "ReplicaSemiSyncMustNotBeSet"
    49  	UnreachablePrimaryWithLaggingReplicas  AnalysisCode = "UnreachablePrimaryWithLaggingReplicas"
    50  	UnreachablePrimary                     AnalysisCode = "UnreachablePrimary"
    51  	PrimarySingleReplicaNotReplicating     AnalysisCode = "PrimarySingleReplicaNotReplicating"
    52  	PrimarySingleReplicaDead               AnalysisCode = "PrimarySingleReplicaDead"
    53  	AllPrimaryReplicasNotReplicating       AnalysisCode = "AllPrimaryReplicasNotReplicating"
    54  	AllPrimaryReplicasNotReplicatingOrDead AnalysisCode = "AllPrimaryReplicasNotReplicatingOrDead"
    55  	LockedSemiSyncPrimaryHypothesis        AnalysisCode = "LockedSemiSyncPrimaryHypothesis"
    56  	LockedSemiSyncPrimary                  AnalysisCode = "LockedSemiSyncPrimary"
    57  	PrimaryWithoutReplicas                 AnalysisCode = "PrimaryWithoutReplicas"
    58  	BinlogServerFailingToConnectToPrimary  AnalysisCode = "BinlogServerFailingToConnectToPrimary"
    59  	GraceFulPrimaryTakeover                AnalysisCode = "GracefulPrimaryTakeover"
    60  )
    61  
    62  const (
    63  	StatementAndMixedLoggingReplicasStructureWarning     StructureAnalysisCode = "StatementAndMixedLoggingReplicasStructureWarning"
    64  	StatementAndRowLoggingReplicasStructureWarning       StructureAnalysisCode = "StatementAndRowLoggingReplicasStructureWarning"
    65  	MixedAndRowLoggingReplicasStructureWarning           StructureAnalysisCode = "MixedAndRowLoggingReplicasStructureWarning"
    66  	MultipleMajorVersionsLoggingReplicasStructureWarning StructureAnalysisCode = "MultipleMajorVersionsLoggingReplicasStructureWarning"
    67  	NoLoggingReplicasStructureWarning                    StructureAnalysisCode = "NoLoggingReplicasStructureWarning"
    68  	DifferentGTIDModesStructureWarning                   StructureAnalysisCode = "DifferentGTIDModesStructureWarning"
    69  	ErrantGTIDStructureWarning                           StructureAnalysisCode = "ErrantGTIDStructureWarning"
    70  	NoFailoverSupportStructureWarning                    StructureAnalysisCode = "NoFailoverSupportStructureWarning"
    71  	NoWriteablePrimaryStructureWarning                   StructureAnalysisCode = "NoWriteablePrimaryStructureWarning"
    72  	NotEnoughValidSemiSyncReplicasStructureWarning       StructureAnalysisCode = "NotEnoughValidSemiSyncReplicasStructureWarning"
    73  )
    74  
    75  type InstanceAnalysis struct {
    76  	key      *InstanceKey
    77  	analysis AnalysisCode
    78  }
    79  
    80  func NewInstanceAnalysis(instanceKey *InstanceKey, analysis AnalysisCode) *InstanceAnalysis {
    81  	return &InstanceAnalysis{
    82  		key:      instanceKey,
    83  		analysis: analysis,
    84  	}
    85  }
    86  
    87  func (instanceAnalysis *InstanceAnalysis) String() string {
    88  	return fmt.Sprintf("%s/%s", instanceAnalysis.key.StringCode(), string(instanceAnalysis.analysis))
    89  }
    90  
    91  // PeerAnalysisMap indicates the number of peers agreeing on an analysis.
    92  // Key of this map is a InstanceAnalysis.String()
    93  type PeerAnalysisMap map[string]int
    94  
    95  type ReplicationAnalysisHints struct {
    96  	IncludeDowntimed bool
    97  	IncludeNoProblem bool
    98  	AuditAnalysis    bool
    99  }
   100  
   101  const (
   102  	ForcePrimaryFailoverCommandHint    string = "force-primary-failover"
   103  	ForcePrimaryTakeoverCommandHint    string = "force-primary-takeover"
   104  	GracefulPrimaryTakeoverCommandHint string = "graceful-primary-takeover"
   105  )
   106  
   107  type AnalysisInstanceType string
   108  
   109  const (
   110  	AnalysisInstanceTypePrimary             AnalysisInstanceType = "primary"
   111  	AnalysisInstanceTypeCoPrimary           AnalysisInstanceType = "co-primary"
   112  	AnalysisInstanceTypeIntermediatePrimary AnalysisInstanceType = "intermediate-primary"
   113  )
   114  
   115  // ReplicationAnalysis notes analysis on replication chain status, per instance
   116  type ReplicationAnalysis struct {
   117  	AnalyzedInstanceKey                       InstanceKey
   118  	AnalyzedInstancePrimaryKey                InstanceKey
   119  	TabletType                                topodatapb.TabletType
   120  	PrimaryTimeStamp                          time.Time
   121  	ClusterDetails                            ClusterInfo
   122  	AnalyzedInstanceDataCenter                string
   123  	AnalyzedInstanceRegion                    string
   124  	AnalyzedKeyspace                          string
   125  	AnalyzedShard                             string
   126  	AnalyzedInstancePhysicalEnvironment       string
   127  	AnalyzedInstanceBinlogCoordinates         BinlogCoordinates
   128  	IsPrimary                                 bool
   129  	IsClusterPrimary                          bool
   130  	IsCoPrimary                               bool
   131  	LastCheckValid                            bool
   132  	LastCheckPartialSuccess                   bool
   133  	CountReplicas                             uint
   134  	CountValidReplicas                        uint
   135  	CountValidReplicatingReplicas             uint
   136  	CountReplicasFailingToConnectToPrimary    uint
   137  	CountDowntimedReplicas                    uint
   138  	ReplicationDepth                          uint
   139  	IsFailingToConnectToPrimary               bool
   140  	ReplicationStopped                        bool
   141  	Analysis                                  AnalysisCode
   142  	Description                               string
   143  	StructureAnalysis                         []StructureAnalysisCode
   144  	IsDowntimed                               bool
   145  	IsReplicasDowntimed                       bool // as good as downtimed because all replicas are downtimed AND analysis is all about the replicas (e.e. AllPrimaryReplicasNotReplicating)
   146  	DowntimeEndTimestamp                      string
   147  	DowntimeRemainingSeconds                  int
   148  	IsBinlogServer                            bool
   149  	OracleGTIDImmediateTopology               bool
   150  	MariaDBGTIDImmediateTopology              bool
   151  	BinlogServerImmediateTopology             bool
   152  	SemiSyncPrimaryEnabled                    bool
   153  	SemiSyncPrimaryStatus                     bool
   154  	SemiSyncPrimaryWaitForReplicaCount        uint
   155  	SemiSyncPrimaryClients                    uint
   156  	SemiSyncReplicaEnabled                    bool
   157  	CountSemiSyncReplicasEnabled              uint
   158  	CountLoggingReplicas                      uint
   159  	CountStatementBasedLoggingReplicas        uint
   160  	CountMixedBasedLoggingReplicas            uint
   161  	CountRowBasedLoggingReplicas              uint
   162  	CountDistinctMajorVersionsLoggingReplicas uint
   163  	CountDelayedReplicas                      uint
   164  	CountLaggingReplicas                      uint
   165  	IsActionableRecovery                      bool
   166  	ProcessingNodeHostname                    string
   167  	ProcessingNodeToken                       string
   168  	CountAdditionalAgreeingNodes              int
   169  	StartActivePeriod                         string
   170  	SkippableDueToDowntime                    bool
   171  	GTIDMode                                  string
   172  	MinReplicaGTIDMode                        string
   173  	MaxReplicaGTIDMode                        string
   174  	MaxReplicaGTIDErrant                      string
   175  	CommandHint                               string
   176  	IsReadOnly                                bool
   177  }
   178  
   179  type AnalysisMap map[string](*ReplicationAnalysis)
   180  
   181  type ReplicationAnalysisChangelog struct {
   182  	AnalyzedInstanceKey InstanceKey
   183  	Changelog           []string
   184  }
   185  
   186  func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
   187  	i := struct {
   188  		ReplicationAnalysis
   189  	}{}
   190  	i.ReplicationAnalysis = *replicationAnalysis
   191  
   192  	return json.Marshal(i)
   193  }
   194  
   195  // AnalysisString returns a human friendly description of all analysis issues
   196  func (replicationAnalysis *ReplicationAnalysis) AnalysisString() string {
   197  	result := []string{}
   198  	if replicationAnalysis.Analysis != NoProblem {
   199  		result = append(result, string(replicationAnalysis.Analysis))
   200  	}
   201  	for _, structureAnalysis := range replicationAnalysis.StructureAnalysis {
   202  		result = append(result, string(structureAnalysis))
   203  	}
   204  	return strings.Join(result, ", ")
   205  }
   206  
   207  // Get a string description of the analyzed instance type (primary? co-primary? intermediate-primary?)
   208  func (replicationAnalysis *ReplicationAnalysis) GetAnalysisInstanceType() AnalysisInstanceType {
   209  	if replicationAnalysis.IsCoPrimary {
   210  		return AnalysisInstanceTypeCoPrimary
   211  	}
   212  
   213  	if replicationAnalysis.IsPrimary {
   214  		return AnalysisInstanceTypePrimary
   215  	}
   216  	return AnalysisInstanceTypeIntermediatePrimary
   217  }
   218  
   219  // ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
   220  // between last_attempted_check to last_checked before we consider the instance as invalid.
   221  func ValidSecondsFromSeenToLastAttemptedCheck() uint {
   222  	return config.Config.InstancePollSeconds + 1
   223  }