github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/endpoint-ellipses.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"errors"
    22  	"fmt"
    23  	"net/url"
    24  	"runtime"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  
    29  	"github.com/cespare/xxhash/v2"
    30  	"github.com/minio/minio-go/v7/pkg/set"
    31  	"github.com/minio/minio/internal/config"
    32  	"github.com/minio/pkg/v2/ellipses"
    33  	"github.com/minio/pkg/v2/env"
    34  )
    35  
    36  // This file implements and supports ellipses pattern for
    37  // `minio server` command line arguments.
    38  
    39  // Endpoint set represents parsed ellipses values, also provides
    40  // methods to get the sets of endpoints.
    41  type endpointSet struct {
    42  	argPatterns []ellipses.ArgPattern
    43  	endpoints   []string   // Endpoints saved from previous GetEndpoints().
    44  	setIndexes  [][]uint64 // All the sets.
    45  }
    46  
    47  // Supported set sizes this is used to find the optimal
    48  // single set size.
    49  var setSizes = []uint64{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
    50  
    51  // getDivisibleSize - returns a greatest common divisor of
    52  // all the ellipses sizes.
    53  func getDivisibleSize(totalSizes []uint64) (result uint64) {
    54  	gcd := func(x, y uint64) uint64 {
    55  		for y != 0 {
    56  			x, y = y, x%y
    57  		}
    58  		return x
    59  	}
    60  	result = totalSizes[0]
    61  	for i := 1; i < len(totalSizes); i++ {
    62  		result = gcd(result, totalSizes[i])
    63  	}
    64  	return result
    65  }
    66  
    67  // isValidSetSize - checks whether given count is a valid set size for erasure coding.
    68  var isValidSetSize = func(count uint64) bool {
    69  	return (count >= setSizes[0] && count <= setSizes[len(setSizes)-1])
    70  }
    71  
    72  func commonSetDriveCount(divisibleSize uint64, setCounts []uint64) (setSize uint64) {
    73  	// prefers setCounts to be sorted for optimal behavior.
    74  	if divisibleSize < setCounts[len(setCounts)-1] {
    75  		return divisibleSize
    76  	}
    77  
    78  	// Figure out largest value of total_drives_in_erasure_set which results
    79  	// in least number of total_drives/total_drives_erasure_set ratio.
    80  	prevD := divisibleSize / setCounts[0]
    81  	for _, cnt := range setCounts {
    82  		if divisibleSize%cnt == 0 {
    83  			d := divisibleSize / cnt
    84  			if d <= prevD {
    85  				prevD = d
    86  				setSize = cnt
    87  			}
    88  		}
    89  	}
    90  	return setSize
    91  }
    92  
    93  // possibleSetCountsWithSymmetry returns symmetrical setCounts based on the
    94  // input argument patterns, the symmetry calculation is to ensure that
    95  // we also use uniform number of drives common across all ellipses patterns.
    96  func possibleSetCountsWithSymmetry(setCounts []uint64, argPatterns []ellipses.ArgPattern) []uint64 {
    97  	newSetCounts := make(map[uint64]struct{})
    98  	for _, ss := range setCounts {
    99  		var symmetry bool
   100  		for _, argPattern := range argPatterns {
   101  			for _, p := range argPattern {
   102  				if uint64(len(p.Seq)) > ss {
   103  					symmetry = uint64(len(p.Seq))%ss == 0
   104  				} else {
   105  					symmetry = ss%uint64(len(p.Seq)) == 0
   106  				}
   107  			}
   108  		}
   109  		// With no arg patterns, it is expected that user knows
   110  		// the right symmetry, so either ellipses patterns are
   111  		// provided (recommended) or no ellipses patterns.
   112  		if _, ok := newSetCounts[ss]; !ok && (symmetry || argPatterns == nil) {
   113  			newSetCounts[ss] = struct{}{}
   114  		}
   115  	}
   116  
   117  	setCounts = []uint64{}
   118  	for setCount := range newSetCounts {
   119  		setCounts = append(setCounts, setCount)
   120  	}
   121  
   122  	// Not necessarily needed but it ensures to the readers
   123  	// eyes that we prefer a sorted setCount slice for the
   124  	// subsequent function to figure out the right common
   125  	// divisor, it avoids loops.
   126  	sort.Slice(setCounts, func(i, j int) bool {
   127  		return setCounts[i] < setCounts[j]
   128  	})
   129  
   130  	return setCounts
   131  }
   132  
   133  // getSetIndexes returns list of indexes which provides the set size
   134  // on each index, this function also determines the final set size
   135  // The final set size has the affinity towards choosing smaller
   136  // indexes (total sets)
   137  func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint64, argPatterns []ellipses.ArgPattern) (setIndexes [][]uint64, err error) {
   138  	if len(totalSizes) == 0 || len(args) == 0 {
   139  		return nil, errInvalidArgument
   140  	}
   141  
   142  	setIndexes = make([][]uint64, len(totalSizes))
   143  	for _, totalSize := range totalSizes {
   144  		// Check if totalSize has minimum range upto setSize
   145  		if totalSize < setSizes[0] || totalSize < customSetDriveCount {
   146  			msg := fmt.Sprintf("Incorrect number of endpoints provided %s", args)
   147  			return nil, config.ErrInvalidNumberOfErasureEndpoints(nil).Msg(msg)
   148  		}
   149  	}
   150  
   151  	commonSize := getDivisibleSize(totalSizes)
   152  	possibleSetCounts := func(setSize uint64) (ss []uint64) {
   153  		for _, s := range setSizes {
   154  			if setSize%s == 0 {
   155  				ss = append(ss, s)
   156  			}
   157  		}
   158  		return ss
   159  	}
   160  
   161  	setCounts := possibleSetCounts(commonSize)
   162  	if len(setCounts) == 0 {
   163  		msg := fmt.Sprintf("Incorrect number of endpoints provided %s, number of drives %d is not divisible by any supported erasure set sizes %d", args, commonSize, setSizes)
   164  		return nil, config.ErrInvalidNumberOfErasureEndpoints(nil).Msg(msg)
   165  	}
   166  
   167  	var setSize uint64
   168  	// Custom set drive count allows to override automatic distribution.
   169  	// only meant if you want to further optimize drive distribution.
   170  	if customSetDriveCount > 0 {
   171  		msg := fmt.Sprintf("Invalid set drive count. Acceptable values for %d number drives are %d", commonSize, setCounts)
   172  		var found bool
   173  		for _, ss := range setCounts {
   174  			if ss == customSetDriveCount {
   175  				found = true
   176  			}
   177  		}
   178  		if !found {
   179  			return nil, config.ErrInvalidErasureSetSize(nil).Msg(msg)
   180  		}
   181  
   182  		// No automatic symmetry calculation expected, user is on their own
   183  		setSize = customSetDriveCount
   184  		globalCustomErasureDriveCount = true
   185  	} else {
   186  		// Returns possible set counts with symmetry.
   187  		setCounts = possibleSetCountsWithSymmetry(setCounts, argPatterns)
   188  
   189  		if len(setCounts) == 0 {
   190  			msg := fmt.Sprintf("No symmetric distribution detected with input endpoints provided %s, drives %d cannot be spread symmetrically by any supported erasure set sizes %d", args, commonSize, setSizes)
   191  			return nil, config.ErrInvalidNumberOfErasureEndpoints(nil).Msg(msg)
   192  		}
   193  
   194  		// Final set size with all the symmetry accounted for.
   195  		setSize = commonSetDriveCount(commonSize, setCounts)
   196  	}
   197  
   198  	// Check whether setSize is with the supported range.
   199  	if !isValidSetSize(setSize) {
   200  		msg := fmt.Sprintf("Incorrect number of endpoints provided %s, number of drives %d is not divisible by any supported erasure set sizes %d", args, commonSize, setSizes)
   201  		return nil, config.ErrInvalidNumberOfErasureEndpoints(nil).Msg(msg)
   202  	}
   203  
   204  	for i := range totalSizes {
   205  		for j := uint64(0); j < totalSizes[i]/setSize; j++ {
   206  			setIndexes[i] = append(setIndexes[i], setSize)
   207  		}
   208  	}
   209  
   210  	return setIndexes, nil
   211  }
   212  
   213  // Returns all the expanded endpoints, each argument is expanded separately.
   214  func (s *endpointSet) getEndpoints() (endpoints []string) {
   215  	if len(s.endpoints) != 0 {
   216  		return s.endpoints
   217  	}
   218  	for _, argPattern := range s.argPatterns {
   219  		for _, lbls := range argPattern.Expand() {
   220  			endpoints = append(endpoints, strings.Join(lbls, ""))
   221  		}
   222  	}
   223  	s.endpoints = endpoints
   224  	return endpoints
   225  }
   226  
   227  // Get returns the sets representation of the endpoints
   228  // this function also intelligently decides on what will
   229  // be the right set size etc.
   230  func (s endpointSet) Get() (sets [][]string) {
   231  	k := uint64(0)
   232  	endpoints := s.getEndpoints()
   233  	for i := range s.setIndexes {
   234  		for j := range s.setIndexes[i] {
   235  			sets = append(sets, endpoints[k:s.setIndexes[i][j]+k])
   236  			k = s.setIndexes[i][j] + k
   237  		}
   238  	}
   239  
   240  	return sets
   241  }
   242  
   243  // Return the total size for each argument patterns.
   244  func getTotalSizes(argPatterns []ellipses.ArgPattern) []uint64 {
   245  	var totalSizes []uint64
   246  	for _, argPattern := range argPatterns {
   247  		var totalSize uint64 = 1
   248  		for _, p := range argPattern {
   249  			totalSize *= uint64(len(p.Seq))
   250  		}
   251  		totalSizes = append(totalSizes, totalSize)
   252  	}
   253  	return totalSizes
   254  }
   255  
   256  // Parses all arguments and returns an endpointSet which is a collection
   257  // of endpoints following the ellipses pattern, this is what is used
   258  // by the object layer for initializing itself.
   259  func parseEndpointSet(customSetDriveCount uint64, args ...string) (ep endpointSet, err error) {
   260  	argPatterns := make([]ellipses.ArgPattern, len(args))
   261  	for i, arg := range args {
   262  		patterns, perr := ellipses.FindEllipsesPatterns(arg)
   263  		if perr != nil {
   264  			return endpointSet{}, config.ErrInvalidErasureEndpoints(nil).Msg(perr.Error())
   265  		}
   266  		argPatterns[i] = patterns
   267  	}
   268  
   269  	ep.setIndexes, err = getSetIndexes(args, getTotalSizes(argPatterns), customSetDriveCount, argPatterns)
   270  	if err != nil {
   271  		return endpointSet{}, config.ErrInvalidErasureEndpoints(nil).Msg(err.Error())
   272  	}
   273  
   274  	ep.argPatterns = argPatterns
   275  
   276  	return ep, nil
   277  }
   278  
   279  // GetAllSets - parses all ellipses input arguments, expands them into
   280  // corresponding list of endpoints chunked evenly in accordance with a
   281  // specific set size.
   282  // For example: {1...64} is divided into 4 sets each of size 16.
   283  // This applies to even distributed setup syntax as well.
   284  func GetAllSets(args ...string) ([][]string, error) {
   285  	var customSetDriveCount uint64
   286  	if v := env.Get(EnvErasureSetDriveCount, ""); v != "" {
   287  		driveCount, err := strconv.Atoi(v)
   288  		if err != nil {
   289  			return nil, config.ErrInvalidErasureSetSize(err)
   290  		}
   291  		customSetDriveCount = uint64(driveCount)
   292  	}
   293  
   294  	var setArgs [][]string
   295  	if !ellipses.HasEllipses(args...) {
   296  		var setIndexes [][]uint64
   297  		// Check if we have more one args.
   298  		if len(args) > 1 {
   299  			var err error
   300  			setIndexes, err = getSetIndexes(args, []uint64{uint64(len(args))}, customSetDriveCount, nil)
   301  			if err != nil {
   302  				return nil, err
   303  			}
   304  		} else {
   305  			// We are in FS setup, proceed forward.
   306  			setIndexes = [][]uint64{{uint64(len(args))}}
   307  		}
   308  		s := endpointSet{
   309  			endpoints:  args,
   310  			setIndexes: setIndexes,
   311  		}
   312  		setArgs = s.Get()
   313  	} else {
   314  		s, err := parseEndpointSet(customSetDriveCount, args...)
   315  		if err != nil {
   316  			return nil, err
   317  		}
   318  		setArgs = s.Get()
   319  	}
   320  
   321  	uniqueArgs := set.NewStringSet()
   322  	for _, sargs := range setArgs {
   323  		for _, arg := range sargs {
   324  			if uniqueArgs.Contains(arg) {
   325  				return nil, config.ErrInvalidErasureEndpoints(nil).Msg(fmt.Sprintf("Input args (%s) has duplicate ellipses", args))
   326  			}
   327  			uniqueArgs.Add(arg)
   328  		}
   329  	}
   330  
   331  	return setArgs, nil
   332  }
   333  
   334  // Override set drive count for manual distribution.
   335  const (
   336  	EnvErasureSetDriveCount = "MINIO_ERASURE_SET_DRIVE_COUNT"
   337  )
   338  
   339  var globalCustomErasureDriveCount = false
   340  
   341  type node struct {
   342  	nodeName string
   343  	disks    []string
   344  }
   345  
   346  type endpointsList []node
   347  
   348  func (el *endpointsList) add(arg string) error {
   349  	u, err := url.Parse(arg)
   350  	if err != nil {
   351  		return err
   352  	}
   353  	found := false
   354  	list := *el
   355  	for i := range list {
   356  		if list[i].nodeName == u.Host {
   357  			list[i].disks = append(list[i].disks, u.String())
   358  			found = true
   359  			break
   360  		}
   361  	}
   362  	if !found {
   363  		list = append(list, node{nodeName: u.Host, disks: []string{u.String()}})
   364  	}
   365  	*el = list
   366  	return nil
   367  }
   368  
   369  // buildDisksLayoutFromConfFile supports with and without ellipses transparently.
   370  func buildDisksLayoutFromConfFile(pools [][]string) (layout disksLayout, err error) {
   371  	if len(pools) == 0 {
   372  		return layout, errInvalidArgument
   373  	}
   374  
   375  	for _, list := range pools {
   376  		var endpointsList endpointsList
   377  
   378  		for _, arg := range list {
   379  			switch {
   380  			case ellipses.HasList(arg):
   381  				patterns, err := ellipses.FindListPatterns(arg)
   382  				if err != nil {
   383  					return layout, err
   384  				}
   385  				for _, exp := range patterns.Expand() {
   386  					for _, ep := range exp {
   387  						if err := endpointsList.add(ep); err != nil {
   388  							return layout, err
   389  						}
   390  					}
   391  				}
   392  			case ellipses.HasEllipses(arg):
   393  				patterns, err := ellipses.FindEllipsesPatterns(arg)
   394  				if err != nil {
   395  					return layout, err
   396  				}
   397  				for _, exp := range patterns.Expand() {
   398  					if err := endpointsList.add(strings.Join(exp, "")); err != nil {
   399  						return layout, err
   400  					}
   401  				}
   402  			default:
   403  				if err := endpointsList.add(arg); err != nil {
   404  					return layout, err
   405  				}
   406  			}
   407  		}
   408  
   409  		var stopping bool
   410  		var singleNode bool
   411  		var eps []string
   412  
   413  		for i := 0; ; i++ {
   414  			for _, node := range endpointsList {
   415  				if node.nodeName == "" {
   416  					singleNode = true
   417  				}
   418  
   419  				if len(node.disks) <= i {
   420  					stopping = true
   421  					continue
   422  				}
   423  				if stopping {
   424  					return layout, errors.New("number of disks per node does not match")
   425  				}
   426  				eps = append(eps, node.disks[i])
   427  			}
   428  			if stopping {
   429  				break
   430  			}
   431  		}
   432  
   433  		for _, node := range endpointsList {
   434  			if node.nodeName != "" && singleNode {
   435  				return layout, errors.New("all arguments must but either single node or distributed")
   436  			}
   437  		}
   438  
   439  		setArgs, err := GetAllSets(eps...)
   440  		if err != nil {
   441  			return layout, err
   442  		}
   443  
   444  		h := xxhash.New()
   445  		for _, s := range setArgs {
   446  			for _, d := range s {
   447  				h.WriteString(d)
   448  			}
   449  		}
   450  
   451  		layout.pools = append(layout.pools, poolDisksLayout{
   452  			cmdline: fmt.Sprintf("hash:%x", h.Sum(nil)),
   453  			layout:  setArgs,
   454  		})
   455  	}
   456  	return
   457  }
   458  
   459  // mergeDisksLayoutFromArgs supports with and without ellipses transparently.
   460  func mergeDisksLayoutFromArgs(args []string, ctxt *serverCtxt) (err error) {
   461  	if len(args) == 0 {
   462  		return errInvalidArgument
   463  	}
   464  
   465  	ok := true
   466  	for _, arg := range args {
   467  		ok = ok && !ellipses.HasEllipses(arg)
   468  	}
   469  
   470  	var setArgs [][]string
   471  
   472  	// None of the args have ellipses use the old style.
   473  	if ok {
   474  		setArgs, err = GetAllSets(args...)
   475  		if err != nil {
   476  			return err
   477  		}
   478  		ctxt.Layout = disksLayout{
   479  			legacy: true,
   480  			pools:  []poolDisksLayout{{layout: setArgs}},
   481  		}
   482  		return
   483  	}
   484  
   485  	for _, arg := range args {
   486  		if !ellipses.HasEllipses(arg) && len(args) > 1 {
   487  			// TODO: support SNSD deployments to be decommissioned in future
   488  			return fmt.Errorf("all args must have ellipses for pool expansion (%w) args: %s", errInvalidArgument, args)
   489  		}
   490  		setArgs, err = GetAllSets(arg)
   491  		if err != nil {
   492  			return err
   493  		}
   494  		ctxt.Layout.pools = append(ctxt.Layout.pools, poolDisksLayout{cmdline: arg, layout: setArgs})
   495  	}
   496  	return
   497  }
   498  
   499  // CreateServerEndpoints - validates and creates new endpoints from input args, supports
   500  // both ellipses and without ellipses transparently.
   501  func createServerEndpoints(serverAddr string, poolArgs []poolDisksLayout, legacy bool) (
   502  	endpointServerPools EndpointServerPools, setupType SetupType, err error,
   503  ) {
   504  	if len(poolArgs) == 0 {
   505  		return nil, -1, errInvalidArgument
   506  	}
   507  
   508  	poolEndpoints, setupType, err := CreatePoolEndpoints(serverAddr, poolArgs...)
   509  	if err != nil {
   510  		return nil, -1, err
   511  	}
   512  
   513  	for i, endpointList := range poolEndpoints {
   514  		if err = endpointServerPools.Add(PoolEndpoints{
   515  			Legacy:       legacy,
   516  			SetCount:     len(poolArgs[i].layout),
   517  			DrivesPerSet: len(poolArgs[i].layout[0]),
   518  			Endpoints:    endpointList,
   519  			Platform:     fmt.Sprintf("OS: %s | Arch: %s", runtime.GOOS, runtime.GOARCH),
   520  			CmdLine:      poolArgs[i].cmdline,
   521  		}); err != nil {
   522  			return nil, -1, err
   523  		}
   524  	}
   525  
   526  	return endpointServerPools, setupType, nil
   527  }