go.etcd.io/etcd@v3.3.27+incompatible/etcdctl/ctlv2/command/cluster_health.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package command
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"net/http"
    23  	"os"
    24  	"os/signal"
    25  	"time"
    26  
    27  	"github.com/coreos/etcd/client"
    28  
    29  	"github.com/urfave/cli"
    30  )
    31  
    32  func NewClusterHealthCommand() cli.Command {
    33  	return cli.Command{
    34  		Name:      "cluster-health",
    35  		Usage:     "check the health of the etcd cluster",
    36  		ArgsUsage: " ",
    37  		Flags: []cli.Flag{
    38  			cli.BoolFlag{Name: "forever, f", Usage: "forever check the health every 10 second until CTRL+C"},
    39  		},
    40  		Action: handleClusterHealth,
    41  	}
    42  }
    43  
    44  func handleClusterHealth(c *cli.Context) error {
    45  	forever := c.Bool("forever")
    46  	if forever {
    47  		sigch := make(chan os.Signal, 1)
    48  		signal.Notify(sigch, os.Interrupt)
    49  
    50  		go func() {
    51  			<-sigch
    52  			os.Exit(0)
    53  		}()
    54  	}
    55  
    56  	tr, err := getTransport(c)
    57  	if err != nil {
    58  		handleError(c, ExitServerError, err)
    59  	}
    60  
    61  	hc := http.Client{
    62  		Transport: tr,
    63  	}
    64  
    65  	cln := mustNewClientNoSync(c)
    66  	mi := client.NewMembersAPI(cln)
    67  	ms, err := mi.List(context.TODO())
    68  	if err != nil {
    69  		fmt.Println("cluster may be unhealthy: failed to list members")
    70  		handleError(c, ExitServerError, err)
    71  	}
    72  
    73  	for {
    74  		healthyMembers := 0
    75  		for _, m := range ms {
    76  			if len(m.ClientURLs) == 0 {
    77  				fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
    78  				continue
    79  			}
    80  
    81  			checked := false
    82  			for _, url := range m.ClientURLs {
    83  				resp, err := hc.Get(url + "/health")
    84  				if err != nil {
    85  					fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
    86  					continue
    87  				}
    88  
    89  				result := struct{ Health string }{}
    90  				nresult := struct{ Health bool }{}
    91  				bytes, err := ioutil.ReadAll(resp.Body)
    92  				if err != nil {
    93  					fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
    94  					continue
    95  				}
    96  				resp.Body.Close()
    97  
    98  				err = json.Unmarshal(bytes, &result)
    99  				if err != nil {
   100  					err = json.Unmarshal(bytes, &nresult)
   101  				}
   102  				if err != nil {
   103  					fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
   104  					continue
   105  				}
   106  
   107  				checked = true
   108  				if result.Health == "true" || nresult.Health {
   109  					fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
   110  					healthyMembers++
   111  				} else {
   112  					fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
   113  				}
   114  				break
   115  			}
   116  			if !checked {
   117  				fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
   118  			}
   119  		}
   120  		switch healthyMembers {
   121  		case len(ms):
   122  			fmt.Println("cluster is healthy")
   123  		case 0:
   124  			fmt.Println("cluster is unavailable")
   125  		default:
   126  			fmt.Println("cluster is degraded")
   127  		}
   128  
   129  		if !forever {
   130  			if healthyMembers == len(ms) {
   131  				os.Exit(ExitSuccess)
   132  			}
   133  			os.Exit(ExitClusterNotHealthy)
   134  		}
   135  
   136  		fmt.Printf("\nnext check after 10 second...\n\n")
   137  		time.Sleep(10 * time.Second)
   138  	}
   139  }