github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/bootstrap-peer-server.go (about)

     1  // Copyright (c) 2015-2022 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"math/rand"
    25  	"reflect"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/minio/minio-go/v7/pkg/set"
    31  	"github.com/minio/minio/internal/grid"
    32  	"github.com/minio/minio/internal/logger"
    33  	"github.com/minio/pkg/v2/env"
    34  )
    35  
    36  // To abstract a node over network.
    37  type bootstrapRESTServer struct{}
    38  
    39  //go:generate msgp -file=$GOFILE
    40  
    41  // ServerSystemConfig - captures information about server configuration.
    42  type ServerSystemConfig struct {
    43  	NEndpoints int
    44  	CmdLines   []string
    45  	MinioEnv   map[string]string
    46  }
    47  
    48  // Diff - returns error on first difference found in two configs.
    49  func (s1 *ServerSystemConfig) Diff(s2 *ServerSystemConfig) error {
    50  	ns1 := s1.NEndpoints
    51  	ns2 := s2.NEndpoints
    52  	if ns1 != ns2 {
    53  		return fmt.Errorf("Expected number of endpoints %d, seen %d", ns1, ns2)
    54  	}
    55  
    56  	for i, cmdLine := range s1.CmdLines {
    57  		if cmdLine != s2.CmdLines[i] {
    58  			return fmt.Errorf("Expected command line argument %s, seen %s", cmdLine,
    59  				s2.CmdLines[i])
    60  		}
    61  	}
    62  
    63  	if reflect.DeepEqual(s1.MinioEnv, s2.MinioEnv) {
    64  		return nil
    65  	}
    66  
    67  	// Report differences in environment variables.
    68  	var missing []string
    69  	var mismatching []string
    70  	for k, v := range s1.MinioEnv {
    71  		ev, ok := s2.MinioEnv[k]
    72  		if !ok {
    73  			missing = append(missing, k)
    74  		} else if v != ev {
    75  			mismatching = append(mismatching, k)
    76  		}
    77  	}
    78  	var extra []string
    79  	for k := range s2.MinioEnv {
    80  		_, ok := s1.MinioEnv[k]
    81  		if !ok {
    82  			extra = append(extra, k)
    83  		}
    84  	}
    85  	msg := "Expected same MINIO_ environment variables and values across all servers: "
    86  	if len(missing) > 0 {
    87  		msg += fmt.Sprintf(`Missing environment values: %v. `, missing)
    88  	}
    89  	if len(mismatching) > 0 {
    90  		msg += fmt.Sprintf(`Mismatching environment values: %v. `, mismatching)
    91  	}
    92  	if len(extra) > 0 {
    93  		msg += fmt.Sprintf(`Extra environment values: %v. `, extra)
    94  	}
    95  
    96  	return errors.New(strings.TrimSpace(msg))
    97  }
    98  
    99  var skipEnvs = map[string]struct{}{
   100  	"MINIO_OPTS":          {},
   101  	"MINIO_CERT_PASSWD":   {},
   102  	"MINIO_SERVER_DEBUG":  {},
   103  	"MINIO_DSYNC_TRACE":   {},
   104  	"MINIO_ROOT_USER":     {},
   105  	"MINIO_ROOT_PASSWORD": {},
   106  	"MINIO_ACCESS_KEY":    {},
   107  	"MINIO_SECRET_KEY":    {},
   108  }
   109  
   110  func getServerSystemCfg() *ServerSystemConfig {
   111  	envs := env.List("MINIO_")
   112  	envValues := make(map[string]string, len(envs))
   113  	for _, envK := range envs {
   114  		// skip certain environment variables as part
   115  		// of the whitelist and could be configured
   116  		// differently on each nodes, update skipEnvs()
   117  		// map if there are such environment values
   118  		if _, ok := skipEnvs[envK]; ok {
   119  			continue
   120  		}
   121  		envValues[envK] = logger.HashString(env.Get(envK, ""))
   122  	}
   123  	scfg := &ServerSystemConfig{NEndpoints: globalEndpoints.NEndpoints(), MinioEnv: envValues}
   124  	var cmdLines []string
   125  	for _, ep := range globalEndpoints {
   126  		cmdLines = append(cmdLines, ep.CmdLine)
   127  	}
   128  	scfg.CmdLines = cmdLines
   129  	return scfg
   130  }
   131  
   132  func (s *bootstrapRESTServer) VerifyHandler(params *grid.MSS) (*ServerSystemConfig, *grid.RemoteErr) {
   133  	return getServerSystemCfg(), nil
   134  }
   135  
   136  var serverVerifyHandler = grid.NewSingleHandler[*grid.MSS, *ServerSystemConfig](grid.HandlerServerVerify, grid.NewMSS, func() *ServerSystemConfig { return &ServerSystemConfig{} })
   137  
   138  // registerBootstrapRESTHandlers - register bootstrap rest router.
   139  func registerBootstrapRESTHandlers(gm *grid.Manager) {
   140  	server := &bootstrapRESTServer{}
   141  	logger.FatalIf(serverVerifyHandler.Register(gm, server.VerifyHandler), "unable to register handler")
   142  }
   143  
   144  // client to talk to bootstrap NEndpoints.
   145  type bootstrapRESTClient struct {
   146  	gridConn *grid.Connection
   147  }
   148  
   149  // Verify function verifies the server config.
   150  func (client *bootstrapRESTClient) Verify(ctx context.Context, srcCfg *ServerSystemConfig) (err error) {
   151  	if newObjectLayerFn() != nil {
   152  		return nil
   153  	}
   154  
   155  	recvCfg, err := serverVerifyHandler.Call(ctx, client.gridConn, grid.NewMSS())
   156  	if err != nil {
   157  		return err
   158  	}
   159  	// We do not need the response after returning.
   160  	defer serverVerifyHandler.PutResponse(recvCfg)
   161  
   162  	return srcCfg.Diff(recvCfg)
   163  }
   164  
   165  // Stringer provides a canonicalized representation of node.
   166  func (client *bootstrapRESTClient) String() string {
   167  	return client.gridConn.String()
   168  }
   169  
   170  func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointServerPools, gm *grid.Manager) error {
   171  	srcCfg := getServerSystemCfg()
   172  	clnts := newBootstrapRESTClients(endpointServerPools, gm)
   173  	var onlineServers int
   174  	var offlineEndpoints []error
   175  	var incorrectConfigs []error
   176  	var retries int
   177  	var mu sync.Mutex
   178  	for onlineServers < len(clnts)/2 {
   179  		var wg sync.WaitGroup
   180  		wg.Add(len(clnts))
   181  		onlineServers = 0
   182  		for _, clnt := range clnts {
   183  			go func(clnt *bootstrapRESTClient) {
   184  				defer wg.Done()
   185  
   186  				if clnt.gridConn.State() != grid.StateConnected {
   187  					mu.Lock()
   188  					offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt, grid.ErrDisconnected))
   189  					mu.Unlock()
   190  					return
   191  				}
   192  
   193  				ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
   194  				defer cancel()
   195  
   196  				err := clnt.Verify(ctx, srcCfg)
   197  				mu.Lock()
   198  				if err != nil {
   199  					bootstrapTraceMsg(fmt.Sprintf("clnt.Verify: %v, endpoint: %s", err, clnt))
   200  					if !isNetworkError(err) {
   201  						logger.LogOnceIf(context.Background(), fmt.Errorf("%s has incorrect configuration: %w", clnt, err), "incorrect_"+clnt.String())
   202  						incorrectConfigs = append(incorrectConfigs, fmt.Errorf("%s has incorrect configuration: %w", clnt, err))
   203  					} else {
   204  						offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt, err))
   205  					}
   206  				} else {
   207  					onlineServers++
   208  				}
   209  				mu.Unlock()
   210  			}(clnt)
   211  		}
   212  		wg.Wait()
   213  
   214  		select {
   215  		case <-ctx.Done():
   216  			return ctx.Err()
   217  		default:
   218  			// Sleep and stagger to avoid blocked CPU and thundering
   219  			// herd upon start up sequence.
   220  			time.Sleep(25*time.Millisecond + time.Duration(rand.Int63n(int64(100*time.Millisecond))))
   221  			retries++
   222  			// after 20 retries start logging that servers are not reachable yet
   223  			if retries >= 20 {
   224  				logger.Info(fmt.Sprintf("Waiting for at least %d remote servers with valid configuration to be online", len(clnts)/2))
   225  				if len(offlineEndpoints) > 0 {
   226  					logger.Info(fmt.Sprintf("Following servers are currently offline or unreachable %s", offlineEndpoints))
   227  				}
   228  				if len(incorrectConfigs) > 0 {
   229  					logger.Info(fmt.Sprintf("Following servers have mismatching configuration %s", incorrectConfigs))
   230  				}
   231  				retries = 0 // reset to log again after 20 retries.
   232  			}
   233  			offlineEndpoints = nil
   234  			incorrectConfigs = nil
   235  		}
   236  	}
   237  	return nil
   238  }
   239  
   240  func newBootstrapRESTClients(endpointServerPools EndpointServerPools, gm *grid.Manager) []*bootstrapRESTClient {
   241  	seenClient := set.NewStringSet()
   242  	var clnts []*bootstrapRESTClient
   243  	for _, ep := range endpointServerPools {
   244  		for _, endpoint := range ep.Endpoints {
   245  			if endpoint.IsLocal {
   246  				continue
   247  			}
   248  			if seenClient.Contains(endpoint.Host) {
   249  				continue
   250  			}
   251  			seenClient.Add(endpoint.Host)
   252  			clnts = append(clnts, &bootstrapRESTClient{gm.Connection(endpoint.GridHost())})
   253  		}
   254  	}
   255  	return clnts
   256  }