storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/prepare-storage.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2016 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"errors"
    23  	"fmt"
    24  	"net/http"
    25  	"net/url"
    26  	"os"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/dustin/go-humanize"
    31  
    32  	xhttp "storj.io/minio/cmd/http"
    33  	"storj.io/minio/cmd/logger"
    34  	"storj.io/minio/pkg/sync/errgroup"
    35  )
    36  
    37  var printEndpointError = func() func(Endpoint, error, bool) {
    38  	var mutex sync.Mutex
    39  	printOnce := make(map[Endpoint]map[string]int)
    40  
    41  	return func(endpoint Endpoint, err error, once bool) {
    42  		reqInfo := (&logger.ReqInfo{}).AppendTags("endpoint", endpoint.String())
    43  		ctx := logger.SetReqInfo(GlobalContext, reqInfo)
    44  		mutex.Lock()
    45  		defer mutex.Unlock()
    46  
    47  		m, ok := printOnce[endpoint]
    48  		if !ok {
    49  			m = make(map[string]int)
    50  			m[err.Error()]++
    51  			printOnce[endpoint] = m
    52  			if once {
    53  				logger.LogAlwaysIf(ctx, err)
    54  				return
    55  			}
    56  		}
    57  		// Once is set and we are here means error was already
    58  		// printed once.
    59  		if once {
    60  			return
    61  		}
    62  		// once not set, check if same error occurred 3 times in
    63  		// a row, then make sure we print it to call attention.
    64  		if m[err.Error()] > 2 {
    65  			logger.LogAlwaysIf(ctx, fmt.Errorf("Following error has been printed %d times.. %w", m[err.Error()], err))
    66  			// Reduce the count to introduce further delay in printing
    67  			// but let it again print after the 2th attempt
    68  			m[err.Error()]--
    69  			m[err.Error()]--
    70  		}
    71  		m[err.Error()]++
    72  	}
    73  }()
    74  
    75  // Migrates backend format of local disks.
    76  func formatErasureMigrateLocalEndpoints(endpoints Endpoints) error {
    77  	g := errgroup.WithNErrs(len(endpoints))
    78  	for index, endpoint := range endpoints {
    79  		if !endpoint.IsLocal {
    80  			continue
    81  		}
    82  		index := index
    83  		g.Go(func() error {
    84  			epPath := endpoints[index].Path
    85  			err := formatErasureMigrate(epPath)
    86  			if err != nil && !errors.Is(err, os.ErrNotExist) {
    87  				return err
    88  			}
    89  			return nil
    90  		}, index)
    91  	}
    92  	for _, err := range g.Wait() {
    93  		if err != nil {
    94  			return err
    95  		}
    96  	}
    97  	return nil
    98  }
    99  
   100  // Cleans up tmp directory of local disks.
   101  func formatErasureCleanupTmpLocalEndpoints(endpoints Endpoints) error {
   102  	g := errgroup.WithNErrs(len(endpoints))
   103  	for index, endpoint := range endpoints {
   104  		if !endpoint.IsLocal {
   105  			continue
   106  		}
   107  		index := index
   108  		g.Go(func() error {
   109  			epPath := endpoints[index].Path
   110  			// Need to move temporary objects left behind from previous run of minio
   111  			// server to a unique directory under `minioMetaTmpBucket-old` to clean
   112  			// up `minioMetaTmpBucket` for the current run.
   113  			//
   114  			// /disk1/.minio.sys/tmp-old/
   115  			//  |__ 33a58b40-aecc-4c9f-a22f-ff17bfa33b62
   116  			//  |__ e870a2c1-d09c-450c-a69c-6eaa54a89b3e
   117  			//
   118  			// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
   119  			// temporary objects from one of the previous runs of minio server.
   120  			tmpOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
   121  			if err := renameAll(pathJoin(epPath, minioMetaTmpBucket),
   122  				tmpOld); err != nil && err != errFileNotFound {
   123  				return fmt.Errorf("unable to rename (%s -> %s) %w",
   124  					pathJoin(epPath, minioMetaTmpBucket),
   125  					tmpOld,
   126  					osErrToFileErr(err))
   127  			}
   128  
   129  			// Renames and schedules for puring all bucket metacache.
   130  			renameAllBucketMetacache(epPath)
   131  
   132  			// Removal of tmp-old folder is backgrounded completely.
   133  			go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old"))
   134  
   135  			if err := mkdirAll(pathJoin(epPath, minioMetaTmpBucket), 0777); err != nil {
   136  				return fmt.Errorf("unable to create (%s) %w",
   137  					pathJoin(epPath, minioMetaTmpBucket),
   138  					err)
   139  			}
   140  			return nil
   141  		}, index)
   142  	}
   143  	for _, err := range g.Wait() {
   144  		if err != nil {
   145  			return err
   146  		}
   147  	}
   148  	return nil
   149  }
   150  
   151  // Following error message is added to fix a regression in release
   152  // RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3. This
   153  // migration failed to capture '.This' field properly which indicates
   154  // the disk UUID association. Below error message is returned when
   155  // we see this situation in format.json, for more info refer
   156  // https://github.com/minio/minio/issues/5667
   157  var errErasureV3ThisEmpty = fmt.Errorf("Erasure format version 3 has This field empty")
   158  
   159  // isServerResolvable - checks if the endpoint is resolvable
   160  // by sending a naked HTTP request with liveness checks.
   161  func isServerResolvable(endpoint Endpoint, timeout time.Duration) error {
   162  	serverURL := &url.URL{
   163  		Scheme: endpoint.Scheme,
   164  		Host:   endpoint.Host,
   165  		Path:   pathJoin(healthCheckPathPrefix, healthCheckLivenessPath),
   166  	}
   167  
   168  	var tlsConfig *tls.Config
   169  	if GlobalIsTLS {
   170  		tlsConfig = &tls.Config{
   171  			RootCAs: globalRootCAs,
   172  		}
   173  	}
   174  
   175  	httpClient := &http.Client{
   176  		Transport:
   177  		// For more details about various values used here refer
   178  		// https://golang.org/pkg/net/http/#Transport documentation
   179  		&http.Transport{
   180  			Proxy:                 http.ProxyFromEnvironment,
   181  			DialContext:           xhttp.NewCustomDialContext(3 * time.Second),
   182  			ResponseHeaderTimeout: 3 * time.Second,
   183  			TLSHandshakeTimeout:   3 * time.Second,
   184  			ExpectContinueTimeout: 3 * time.Second,
   185  			TLSClientConfig:       tlsConfig,
   186  			// Go net/http automatically unzip if content-type is
   187  			// gzip disable this feature, as we are always interested
   188  			// in raw stream.
   189  			DisableCompression: true,
   190  		},
   191  	}
   192  	defer httpClient.CloseIdleConnections()
   193  
   194  	ctx, cancel := context.WithTimeout(GlobalContext, timeout)
   195  
   196  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, serverURL.String(), nil)
   197  	if err != nil {
   198  		cancel()
   199  		return err
   200  	}
   201  
   202  	resp, err := httpClient.Do(req)
   203  	cancel()
   204  	if err != nil {
   205  		return err
   206  	}
   207  	xhttp.DrainBody(resp.Body)
   208  
   209  	return nil
   210  }
   211  
   212  // connect to list of endpoints and load all Erasure disk formats, validate the formats are correct
   213  // and are in quorum, if no formats are found attempt to initialize all of them for the first
   214  // time. additionally make sure to close all the disks used in this attempt.
   215  func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID, distributionAlgo string) (storageDisks []StorageAPI, format *formatErasureV3, err error) {
   216  	// Initialize all storage disks
   217  	storageDisks, errs := initStorageDisksWithErrors(endpoints)
   218  
   219  	defer func(storageDisks []StorageAPI) {
   220  		if err != nil {
   221  			closeStorageDisks(storageDisks)
   222  		}
   223  	}(storageDisks)
   224  
   225  	for i, err := range errs {
   226  		if err != nil {
   227  			if err != errDiskNotFound {
   228  				return nil, nil, fmt.Errorf("Disk %s: %w", endpoints[i], err)
   229  			}
   230  			if retryCount >= 5 {
   231  				logger.Info("Unable to connect to %s: %v\n", endpoints[i], isServerResolvable(endpoints[i], time.Second))
   232  			}
   233  		}
   234  	}
   235  
   236  	// Attempt to load all `format.json` from all disks.
   237  	formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
   238  	// Check if we have
   239  	for i, sErr := range sErrs {
   240  		// print the error, nonetheless, which is perhaps unhandled
   241  		if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
   242  			if sErr != nil {
   243  				logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
   244  			}
   245  		}
   246  	}
   247  
   248  	// Pre-emptively check if one of the formatted disks
   249  	// is invalid. This function returns success for the
   250  	// most part unless one of the formats is not consistent
   251  	// with expected Erasure format. For example if a user is
   252  	// trying to pool FS backend into an Erasure set.
   253  	if err = checkFormatErasureValues(formatConfigs, storageDisks, setDriveCount); err != nil {
   254  		return nil, nil, err
   255  	}
   256  
   257  	// All disks report unformatted we should initialized everyone.
   258  	if shouldInitErasureDisks(sErrs) && firstDisk {
   259  		logger.Info("Formatting %s pool, %v set(s), %v drives per set.",
   260  			humanize.Ordinal(poolCount), setCount, setDriveCount)
   261  
   262  		// Initialize erasure code format on disks
   263  		format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, distributionAlgo, sErrs)
   264  		if err != nil {
   265  			return nil, nil, err
   266  		}
   267  
   268  		// Assign globalDeploymentID on first run for the
   269  		// minio server managing the first disk
   270  		globalDeploymentID = format.ID
   271  		return storageDisks, format, nil
   272  	}
   273  
   274  	// Return error when quorum unformatted disks - indicating we are
   275  	// waiting for first server to be online.
   276  	if quorumUnformattedDisks(sErrs) && !firstDisk {
   277  		return nil, nil, errNotFirstDisk
   278  	}
   279  
   280  	// Return error when quorum unformatted disks but waiting for rest
   281  	// of the servers to be online.
   282  	if quorumUnformattedDisks(sErrs) && firstDisk {
   283  		return nil, nil, errFirstDiskWait
   284  	}
   285  
   286  	// Mark all root disks down
   287  	markRootDisksAsDown(storageDisks, sErrs)
   288  
   289  	// Following function is added to fix a regressions which was introduced
   290  	// in release RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3.
   291  	// This migration failed to capture '.This' field properly which indicates
   292  	// the disk UUID association. Below function is called to handle and fix
   293  	// this regression, for more info refer https://github.com/minio/minio/issues/5667
   294  	if err = fixFormatErasureV3(storageDisks, endpoints, formatConfigs); err != nil {
   295  		return nil, nil, err
   296  	}
   297  
   298  	// If any of the .This field is still empty, we return error.
   299  	if formatErasureV3ThisEmpty(formatConfigs) {
   300  		return nil, nil, errErasureV3ThisEmpty
   301  	}
   302  
   303  	format, err = getFormatErasureInQuorum(formatConfigs)
   304  	if err != nil {
   305  		return nil, nil, err
   306  	}
   307  
   308  	if format.ID == "" {
   309  		// Not a first disk, wait until first disk fixes deploymentID
   310  		if !firstDisk {
   311  			return nil, nil, errNotFirstDisk
   312  		}
   313  		if err = formatErasureFixDeploymentID(endpoints, storageDisks, format); err != nil {
   314  			return nil, nil, err
   315  		}
   316  	}
   317  
   318  	globalDeploymentID = format.ID
   319  
   320  	if err = formatErasureFixLocalDeploymentID(endpoints, storageDisks, format); err != nil {
   321  		return nil, nil, err
   322  	}
   323  
   324  	// The will always recreate some directories inside .minio.sys of
   325  	// the local disk such as tmp, multipart and background-ops
   326  	initErasureMetaVolumesInLocalDisks(storageDisks, formatConfigs)
   327  
   328  	return storageDisks, format, nil
   329  }
   330  
   331  // Format disks before initialization of object layer.
   332  func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID, distributionAlgo string) ([]StorageAPI, *formatErasureV3, error) {
   333  	if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 {
   334  		return nil, nil, errInvalidArgument
   335  	}
   336  
   337  	if err := formatErasureMigrateLocalEndpoints(endpoints); err != nil {
   338  		return nil, nil, err
   339  	}
   340  
   341  	if err := formatErasureCleanupTmpLocalEndpoints(endpoints); err != nil {
   342  		return nil, nil, err
   343  	}
   344  
   345  	// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
   346  	// All times are rounded to avoid showing milli, micro and nano seconds
   347  	formatStartTime := time.Now().Round(time.Second)
   348  	getElapsedTime := func() string {
   349  		return time.Now().Round(time.Second).Sub(formatStartTime).String()
   350  	}
   351  
   352  	// Wait on each try for an update.
   353  	ticker := time.NewTicker(500 * time.Millisecond)
   354  	defer ticker.Stop()
   355  	var tries int
   356  	for {
   357  		select {
   358  		case <-ticker.C:
   359  			storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID, distributionAlgo)
   360  			if err != nil {
   361  				tries++
   362  				switch err {
   363  				case errNotFirstDisk:
   364  					// Fresh setup, wait for first server to be up.
   365  					logger.Info("Waiting for the first server to format the disks.")
   366  					continue
   367  				case errFirstDiskWait:
   368  					// Fresh setup, wait for other servers to come up.
   369  					logger.Info("Waiting for all other servers to be online to format the disks.")
   370  					continue
   371  				case errErasureReadQuorum:
   372  					// no quorum available continue to wait for minimum number of servers.
   373  					logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", len(endpoints)/2, getElapsedTime())
   374  					continue
   375  				case errErasureV3ThisEmpty:
   376  					// need to wait for this error to be healed, so continue.
   377  					continue
   378  				default:
   379  					// For all other unhandled errors we exit and fail.
   380  					return nil, nil, err
   381  				}
   382  			}
   383  			return storageDisks, format, nil
   384  		case <-globalOSSignalCh:
   385  			return nil, nil, fmt.Errorf("Initializing data volumes gracefully stopped")
   386  		}
   387  	}
   388  }