github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/tools/validatormon/validatormon.go (about)

     1  package main
     2  
     3  import (
     4  	"errors"
     5  	"flag"
     6  	"log"
     7  	"strings"
     8  	"time"
     9  
    10  	stathat "github.com/stathat/go"
    11  )
    12  
    13  // this is a tool to monitor stellar validators.
    14  
    15  var nodes = map[string]string{
    16  	"keybase1": "GCWJKM4EGTGJUVSWUJDPCQEOEP5LHSOFKSA4HALBTOO4T4H3HCHOM6UX",
    17  	"keybase2": "GDKWELGJURRKXECG3HHFHXMRX64YWQPUHKCVRESOX3E5PM6DM4YXLZJM",
    18  	"keybase3": "GA35T3723UP2XJLC2H7MNL6VMKZZIFL2VW7XHMFFJKKIA2FJCYTLKFBW",
    19  	"sdf1":     "GCGB2S2KGYARPVIA37HYZXVRM2YZUEXA6S33ZU5BUDC6THSB62LZSTYH",
    20  	"sdf2":     "GCM6QMP3DLRPTAZW2UZPCPX2LF3SXWXKPMP3GKFZBDSF3QZGV2G5QSTK",
    21  	"sdf3":     "GABMKJM6I25XI4K7U6XWMULOUQIQ27BCTMLS6BYYSOWKTBUXVRJSXHYQ",
    22  }
    23  
    24  var kbNodes = []string{"keybase1", "keybase2", "keybase3"}
    25  var cmpNodes = []string{"sdf1", "sdf2", "sdf3"}
    26  
    27  type Status struct {
    28  	Node    string
    29  	Ledger  int
    30  	Phase   string
    31  	Missing []string
    32  }
    33  
    34  type StatusReader interface {
    35  	StatusRead(accountID string) (*Status, error)
    36  }
    37  
    38  func CompareLedger(sr StatusReader) int {
    39  	var maxLedger int
    40  
    41  	for _, n := range cmpNodes {
    42  		status, err := sr.StatusRead(nodes[n])
    43  		if err != nil {
    44  			log.Printf("StatusRead error for %s (%s): %s", n, nodes[n], err)
    45  			continue
    46  		}
    47  		if status.Ledger > maxLedger {
    48  			maxLedger = status.Ledger
    49  		}
    50  	}
    51  
    52  	return maxLedger
    53  }
    54  
    55  type Analysis struct {
    56  	LedgerDelta  int
    57  	Phase        string
    58  	MissingCount int
    59  	Ok           bool
    60  	LedgerBehind bool
    61  	BadPhase     bool
    62  }
    63  
    64  func AnalyzeNode(sr StatusReader, nodeName string) (*Analysis, error) {
    65  	cl := CompareLedger(sr)
    66  	if cl == 0 {
    67  		return nil, errors.New("all compare nodes returning 0 ledger")
    68  	}
    69  
    70  	status, err := sr.StatusRead(nodes[nodeName])
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	var a Analysis
    76  	a.LedgerDelta = cl - status.Ledger
    77  	a.Phase = status.Phase
    78  	a.MissingCount = len(status.Missing)
    79  	/*
    80  		if a.LedgerDelta < 10 && a.Phase == "EXTERNALIZE" {
    81  			a.Ok = true
    82  		} else {
    83  			if a.LedgerDelta >= 10 {
    84  				a.LedgerBehind = true
    85  			}
    86  			if a.Phase != "EXTERNALIZE" {
    87  				a.BadPhase = true
    88  			}
    89  		}
    90  	*/
    91  	// phase looks like it can be all over the place...will just check ledger
    92  	if a.LedgerDelta < 10 {
    93  		a.Ok = true
    94  	} else {
    95  		a.LedgerBehind = true
    96  	}
    97  
    98  	return &a, nil
    99  }
   100  
   101  var shkey string
   102  
   103  func main() {
   104  	log.Printf("validatormon starting")
   105  	parseFlags()
   106  	analyzeNodes()
   107  	log.Printf("waiting until stat posts are complete")
   108  	stathat.WaitUntilFinished(30 * time.Second)
   109  	log.Printf("validatormon finished")
   110  }
   111  
   112  func parseFlags() {
   113  	flag.StringVar(&shkey, "shkey", "", "StatHat ezkey")
   114  	flag.Parse()
   115  	if shkey == "" {
   116  		log.Printf("no shkey provided, proceeding but no stats will be reported")
   117  	}
   118  }
   119  
   120  func analyzeNodes() {
   121  	sr := new(LocalReader)
   122  	for _, n := range kbNodes {
   123  		a, err := AnalyzeNode(sr, n)
   124  		if err != nil {
   125  			log.Printf("AnalyzeNode %s (%s) error: %s", n, nodes[n], err)
   126  			postCount("monitor error~total," + n)
   127  			continue
   128  		}
   129  
   130  		if a.Ok {
   131  			log.Printf("node %s is ok", n)
   132  			postCount("ok~total," + n)
   133  		} else {
   134  			log.Printf("node %s is not ok (%+v)", n, a)
   135  			pieces := []string{"total"}
   136  			if a.BadPhase {
   137  				pieces = append(pieces, "bad phase")
   138  			}
   139  			if a.LedgerBehind {
   140  				pieces = append(pieces, "ledger behind")
   141  			}
   142  			pieces = append(pieces, n)
   143  			postCount("not ok~" + strings.Join(pieces, ","))
   144  		}
   145  
   146  		log.Printf("node %s missing count: %d", n, a.MissingCount)
   147  		postValue("missing count~all,"+n, a.MissingCount)
   148  
   149  		log.Printf("node %s ledger delta: %d", n, a.LedgerDelta)
   150  		postValue("ledger delta~all,"+n, a.LedgerDelta)
   151  	}
   152  }
   153  
   154  const statPrefix = "stellar - validator - "
   155  
   156  func postCount(name string) {
   157  	if shkey == "" {
   158  		return
   159  	}
   160  	sname := statPrefix + name
   161  	if err := stathat.PostEZCountOne(sname, shkey); err != nil {
   162  		log.Printf("stathat post error: %s", err)
   163  	}
   164  }
   165  
   166  func postValue(name string, v int) {
   167  	if shkey == "" {
   168  		return
   169  	}
   170  	sname := statPrefix + name
   171  	if err := stathat.PostEZValue(sname, shkey, float64(v)); err != nil {
   172  		log.Printf("stathat post error: %s", err)
   173  	}
   174  }