github.com/minio/madmin-go/v3@v3.0.51/cluster-health.go (about)

     1  //
     2  // Copyright (c) 2015-2022 MinIO, Inc.
     3  //
     4  // This file is part of MinIO Object Storage stack
     5  //
     6  // This program is free software: you can redistribute it and/or modify
     7  // it under the terms of the GNU Affero General Public License as
     8  // published by the Free Software Foundation, either version 3 of the
     9  // License, or (at your option) any later version.
    10  //
    11  // This program is distributed in the hope that it will be useful,
    12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  // GNU Affero General Public License for more details.
    15  //
    16  // You should have received a copy of the GNU Affero General Public License
    17  // along with this program. If not, see <http://www.gnu.org/licenses/>.
    18  //
    19  
    20  package madmin
    21  
    22  import (
    23  	"context"
    24  	"net/http"
    25  	"net/http/httptrace"
    26  	"net/url"
    27  	"strconv"
    28  	"sync"
    29  	"time"
    30  )
    31  
    32  const (
    33  	minioWriteQuorumHeader     = "x-minio-write-quorum"
    34  	minIOHealingDrives         = "x-minio-healing-drives"
    35  	clusterCheckEndpoint       = "/minio/health/cluster"
    36  	clusterReadCheckEndpoint   = "/minio/health/cluster/read"
    37  	maintanenceURLParameterKey = "maintenance"
    38  )
    39  
    40  // HealthResult represents the cluster health result
    41  type HealthResult struct {
    42  	Healthy         bool
    43  	MaintenanceMode bool
    44  	WriteQuorum     int
    45  	HealingDrives   int
    46  }
    47  
    48  // HealthOpts represents the input options for the health check
    49  type HealthOpts struct {
    50  	ClusterRead bool
    51  	Maintenance bool
    52  }
    53  
    54  // Healthy will hit `/minio/health/cluster` and `/minio/health/cluster/ready` anonymous APIs to check the cluster health
    55  func (an *AnonymousClient) Healthy(ctx context.Context, opts HealthOpts) (result HealthResult, err error) {
    56  	if opts.ClusterRead {
    57  		return an.clusterReadCheck(ctx)
    58  	}
    59  	return an.clusterCheck(ctx, opts.Maintenance)
    60  }
    61  
    62  func (an *AnonymousClient) clusterCheck(ctx context.Context, maintenance bool) (result HealthResult, err error) {
    63  	urlValues := make(url.Values)
    64  	if maintenance {
    65  		urlValues.Set(maintanenceURLParameterKey, "true")
    66  	}
    67  
    68  	resp, err := an.executeMethod(ctx, http.MethodGet, requestData{
    69  		relPath:     clusterCheckEndpoint,
    70  		queryValues: urlValues,
    71  	}, nil)
    72  	defer closeResponse(resp)
    73  	if err != nil {
    74  		return result, err
    75  	}
    76  
    77  	if resp != nil {
    78  		writeQuorumStr := resp.Header.Get(minioWriteQuorumHeader)
    79  		if writeQuorumStr != "" {
    80  			result.WriteQuorum, err = strconv.Atoi(writeQuorumStr)
    81  			if err != nil {
    82  				return result, err
    83  			}
    84  		}
    85  		healingDrivesStr := resp.Header.Get(minIOHealingDrives)
    86  		if healingDrivesStr != "" {
    87  			result.HealingDrives, err = strconv.Atoi(healingDrivesStr)
    88  			if err != nil {
    89  				return result, err
    90  			}
    91  		}
    92  		switch resp.StatusCode {
    93  		case http.StatusOK:
    94  			result.Healthy = true
    95  		case http.StatusPreconditionFailed:
    96  			result.MaintenanceMode = true
    97  		default:
    98  			// Not Healthy
    99  		}
   100  	}
   101  	return result, nil
   102  }
   103  
   104  func (an *AnonymousClient) clusterReadCheck(ctx context.Context) (result HealthResult, err error) {
   105  	resp, err := an.executeMethod(ctx, http.MethodGet, requestData{
   106  		relPath: clusterReadCheckEndpoint,
   107  	}, nil)
   108  	defer closeResponse(resp)
   109  	if err != nil {
   110  		return result, err
   111  	}
   112  
   113  	if resp != nil {
   114  		switch resp.StatusCode {
   115  		case http.StatusOK:
   116  			result.Healthy = true
   117  		default:
   118  			// Not Healthy
   119  		}
   120  	}
   121  	return result, nil
   122  }
   123  
   124  // AliveOpts customizing liveness check.
   125  type AliveOpts struct {
   126  	Readiness bool // send request to /minio/health/ready
   127  }
   128  
   129  // AliveResult returns the time spent getting a response
   130  // back from the server on /minio/health/live endpoint
   131  type AliveResult struct {
   132  	Endpoint       *url.URL      `json:"endpoint"`
   133  	ResponseTime   time.Duration `json:"responseTime"`
   134  	DNSResolveTime time.Duration `json:"dnsResolveTime"`
   135  	Online         bool          `json:"online"` // captures x-minio-server-status
   136  	Error          error         `json:"error"`
   137  }
   138  
   139  // Alive will hit `/minio/health/live` to check if server is reachable, optionally returns
   140  // the amount of time spent getting a response back from the server.
   141  func (an *AnonymousClient) Alive(ctx context.Context, opts AliveOpts, servers ...ServerProperties) (resultsCh chan AliveResult) {
   142  	resource := "/minio/health/live"
   143  	if opts.Readiness {
   144  		resource = "/minio/health/ready"
   145  	}
   146  
   147  	scheme := "http"
   148  	if an.endpointURL != nil {
   149  		scheme = an.endpointURL.Scheme
   150  	}
   151  
   152  	resultsCh = make(chan AliveResult)
   153  	go func() {
   154  		defer close(resultsCh)
   155  		if len(servers) == 0 {
   156  			an.alive(ctx, an.endpointURL, resource, resultsCh)
   157  		} else {
   158  			var wg sync.WaitGroup
   159  			wg.Add(len(servers))
   160  			for _, server := range servers {
   161  				server := server
   162  				go func() {
   163  					defer wg.Done()
   164  					sscheme := server.Scheme
   165  					if sscheme == "" {
   166  						sscheme = scheme
   167  					}
   168  					u, err := url.Parse(sscheme + "://" + server.Endpoint)
   169  					if err != nil {
   170  						resultsCh <- AliveResult{
   171  							Error: err,
   172  						}
   173  						return
   174  					}
   175  					an.alive(ctx, u, resource, resultsCh)
   176  				}()
   177  			}
   178  			wg.Wait()
   179  		}
   180  	}()
   181  
   182  	return resultsCh
   183  }
   184  
   185  func (an *AnonymousClient) alive(ctx context.Context, u *url.URL, resource string, resultsCh chan AliveResult) {
   186  	var (
   187  		dnsStartTime, dnsDoneTime   time.Time
   188  		reqStartTime, firstByteTime time.Time
   189  	)
   190  
   191  	trace := &httptrace.ClientTrace{
   192  		DNSStart: func(_ httptrace.DNSStartInfo) {
   193  			dnsStartTime = time.Now()
   194  		},
   195  		DNSDone: func(_ httptrace.DNSDoneInfo) {
   196  			dnsDoneTime = time.Now()
   197  		},
   198  		GetConn: func(_ string) {
   199  			// GetConn is called again when trace is ON
   200  			// https://github.com/golang/go/issues/44281
   201  			if reqStartTime.IsZero() {
   202  				reqStartTime = time.Now()
   203  			}
   204  		},
   205  		GotFirstResponseByte: func() {
   206  			firstByteTime = time.Now()
   207  		},
   208  	}
   209  
   210  	resp, err := an.executeMethod(ctx, http.MethodGet, requestData{
   211  		relPath:          resource,
   212  		endpointOverride: u,
   213  	}, trace)
   214  	closeResponse(resp)
   215  	var respTime time.Duration
   216  	if firstByteTime.IsZero() {
   217  		respTime = time.Since(reqStartTime)
   218  	} else {
   219  		respTime = firstByteTime.Sub(reqStartTime) - dnsDoneTime.Sub(dnsStartTime)
   220  	}
   221  
   222  	result := AliveResult{
   223  		Endpoint:       u,
   224  		ResponseTime:   respTime,
   225  		DNSResolveTime: dnsDoneTime.Sub(dnsStartTime),
   226  	}
   227  	if err != nil {
   228  		result.Error = err
   229  	} else {
   230  		result.Online = resp.StatusCode == http.StatusOK && resp.Header.Get("x-minio-server-status") != "offline"
   231  	}
   232  
   233  	select {
   234  	case <-ctx.Done():
   235  		return
   236  	case resultsCh <- result:
   237  	}
   238  }