github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/peer-s3-client.go (about)

     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"sort"
    25  	"strconv"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/minio/madmin-go/v3"
    30  	"github.com/minio/minio/internal/grid"
    31  	"github.com/minio/minio/internal/logger"
    32  	"github.com/minio/pkg/v2/sync/errgroup"
    33  	"golang.org/x/exp/slices"
    34  )
    35  
    36  var errPeerOffline = errors.New("peer is offline")
    37  
    38  type peerS3Client interface {
    39  	ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error)
    40  	HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error)
    41  	GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error)
    42  	MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error
    43  	DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error
    44  
    45  	GetHost() string
    46  	SetPools([]int)
    47  	GetPools() []int
    48  }
    49  
    50  type localPeerS3Client struct {
    51  	node  Node
    52  	pools []int
    53  }
    54  
    55  func (l *localPeerS3Client) GetHost() string {
    56  	return l.node.Host
    57  }
    58  
    59  func (l *localPeerS3Client) SetPools(p []int) {
    60  	l.pools = make([]int, len(p))
    61  	copy(l.pools, p)
    62  }
    63  
    64  func (l localPeerS3Client) GetPools() []int {
    65  	return l.pools
    66  }
    67  
    68  func (l localPeerS3Client) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) {
    69  	return listBucketsLocal(ctx, opts)
    70  }
    71  
    72  func (l localPeerS3Client) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
    73  	return healBucketLocal(ctx, bucket, opts)
    74  }
    75  
    76  func (l localPeerS3Client) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) {
    77  	return getBucketInfoLocal(ctx, bucket, opts)
    78  }
    79  
    80  func (l localPeerS3Client) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error {
    81  	return makeBucketLocal(ctx, bucket, opts)
    82  }
    83  
    84  func (l localPeerS3Client) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
    85  	return deleteBucketLocal(ctx, bucket, opts)
    86  }
    87  
    88  // client to talk to peer Nodes.
    89  type remotePeerS3Client struct {
    90  	node  Node
    91  	pools []int
    92  
    93  	// Function that returns the grid connection for this peer when initialized.
    94  	// Will return nil if the grid connection is not initialized yet.
    95  	gridConn func() *grid.Connection
    96  }
    97  
    98  // S3PeerSys - S3 peer call system.
    99  type S3PeerSys struct {
   100  	peerClients []peerS3Client // Excludes self
   101  	poolsCount  int
   102  }
   103  
   104  // NewS3PeerSys - creates new S3 peer calls.
   105  func NewS3PeerSys(endpoints EndpointServerPools) *S3PeerSys {
   106  	return &S3PeerSys{
   107  		peerClients: newPeerS3Clients(endpoints),
   108  		poolsCount:  len(endpoints),
   109  	}
   110  }
   111  
   112  // HealBucket - heals buckets at node level
   113  func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
   114  	g := errgroup.WithNErrs(len(sys.peerClients))
   115  
   116  	for idx, client := range sys.peerClients {
   117  		idx := idx
   118  		client := client
   119  		g.Go(func() error {
   120  			if client == nil {
   121  				return errPeerOffline
   122  			}
   123  			_, err := client.GetBucketInfo(ctx, bucket, BucketOptions{})
   124  			return err
   125  		}, idx)
   126  	}
   127  
   128  	errs := g.Wait()
   129  
   130  	var poolErrs []error
   131  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   132  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   133  		for i, client := range sys.peerClients {
   134  			if slices.Contains(client.GetPools(), poolIdx) {
   135  				perPoolErrs = append(perPoolErrs, errs[i])
   136  			}
   137  		}
   138  		quorum := len(perPoolErrs) / 2
   139  		poolErrs = append(poolErrs, reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum))
   140  	}
   141  
   142  	opts.Remove = isAllBucketsNotFound(poolErrs)
   143  	opts.Recreate = !opts.Remove
   144  
   145  	g = errgroup.WithNErrs(len(sys.peerClients))
   146  	healBucketResults := make([]madmin.HealResultItem, len(sys.peerClients))
   147  	for idx, client := range sys.peerClients {
   148  		idx := idx
   149  		client := client
   150  		g.Go(func() error {
   151  			if client == nil {
   152  				return errPeerOffline
   153  			}
   154  			res, err := client.HealBucket(ctx, bucket, opts)
   155  			if err != nil {
   156  				return err
   157  			}
   158  			healBucketResults[idx] = res
   159  			return nil
   160  		}, idx)
   161  	}
   162  
   163  	errs = g.Wait()
   164  
   165  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   166  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   167  		for i, client := range sys.peerClients {
   168  			if slices.Contains(client.GetPools(), poolIdx) {
   169  				perPoolErrs = append(perPoolErrs, errs[i])
   170  			}
   171  		}
   172  		quorum := len(perPoolErrs) / 2
   173  		if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil {
   174  			return madmin.HealResultItem{}, poolErr
   175  		}
   176  	}
   177  
   178  	for i, err := range errs {
   179  		if err == nil {
   180  			return healBucketResults[i], nil
   181  		}
   182  	}
   183  
   184  	return madmin.HealResultItem{}, toObjectErr(errVolumeNotFound, bucket)
   185  }
   186  
   187  // ListBuckets lists buckets across all nodes and returns a consistent view:
   188  //   - Return an error when a pool cannot return N/2+1 valid bucket information
   189  //   - For each pool, check if the bucket exists in N/2+1 nodes before including it in the final result
   190  func (sys *S3PeerSys) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) {
   191  	g := errgroup.WithNErrs(len(sys.peerClients))
   192  
   193  	nodeBuckets := make([][]BucketInfo, len(sys.peerClients))
   194  
   195  	for idx, client := range sys.peerClients {
   196  		idx := idx
   197  		client := client
   198  		g.Go(func() error {
   199  			if client == nil {
   200  				return errPeerOffline
   201  			}
   202  			localBuckets, err := client.ListBuckets(ctx, opts)
   203  			if err != nil {
   204  				return err
   205  			}
   206  			nodeBuckets[idx] = localBuckets
   207  			return nil
   208  		}, idx)
   209  	}
   210  
   211  	errs := g.Wait()
   212  
   213  	// The list of buckets in a map to avoid duplication
   214  	resultMap := make(map[string]BucketInfo)
   215  
   216  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   217  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   218  		for i, client := range sys.peerClients {
   219  			if slices.Contains(client.GetPools(), poolIdx) {
   220  				perPoolErrs = append(perPoolErrs, errs[i])
   221  			}
   222  		}
   223  		quorum := len(perPoolErrs) / 2
   224  		if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil {
   225  			return nil, poolErr
   226  		}
   227  
   228  		bucketsMap := make(map[string]int)
   229  		for idx, buckets := range nodeBuckets {
   230  			if buckets == nil {
   231  				continue
   232  			}
   233  			if !slices.Contains(sys.peerClients[idx].GetPools(), poolIdx) {
   234  				continue
   235  			}
   236  			for _, bi := range buckets {
   237  				_, ok := resultMap[bi.Name]
   238  				if ok {
   239  					// Skip it, this bucket is found in another pool
   240  					continue
   241  				}
   242  				bucketsMap[bi.Name]++
   243  				if bucketsMap[bi.Name] >= quorum {
   244  					resultMap[bi.Name] = bi
   245  				}
   246  			}
   247  		}
   248  		// loop through buckets and see if some with lost quorum
   249  		// these could be stale buckets lying around, queue a heal
   250  		// of such a bucket. This is needed here as we identify such
   251  		// buckets here while listing buckets. As part of regular
   252  		// globalBucketMetadataSys.Init() call would get a valid
   253  		// buckets only and not the quourum lost ones like this, so
   254  		// explicit call
   255  		for bktName, count := range bucketsMap {
   256  			if count < quorum {
   257  				// Queue a bucket heal task
   258  				globalMRFState.addPartialOp(partialOperation{
   259  					bucket: bktName,
   260  					queued: time.Now(),
   261  				})
   262  			}
   263  		}
   264  	}
   265  
   266  	result := make([]BucketInfo, 0, len(resultMap))
   267  	for _, bi := range resultMap {
   268  		result = append(result, bi)
   269  	}
   270  
   271  	sort.Slice(result, func(i, j int) bool {
   272  		return result[i].Name < result[j].Name
   273  	})
   274  
   275  	return result, nil
   276  }
   277  
   278  // GetBucketInfo returns bucket stat info about bucket on disk across all peers
   279  func (sys *S3PeerSys) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (binfo BucketInfo, err error) {
   280  	g := errgroup.WithNErrs(len(sys.peerClients))
   281  
   282  	bucketInfos := make([]BucketInfo, len(sys.peerClients))
   283  	for idx, client := range sys.peerClients {
   284  		idx := idx
   285  		client := client
   286  		g.Go(func() error {
   287  			if client == nil {
   288  				return errPeerOffline
   289  			}
   290  			bucketInfo, err := client.GetBucketInfo(ctx, bucket, opts)
   291  			if err != nil {
   292  				return err
   293  			}
   294  			bucketInfos[idx] = bucketInfo
   295  			return nil
   296  		}, idx)
   297  	}
   298  
   299  	errs := g.Wait()
   300  
   301  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   302  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   303  		for i, client := range sys.peerClients {
   304  			if slices.Contains(client.GetPools(), poolIdx) {
   305  				perPoolErrs = append(perPoolErrs, errs[i])
   306  			}
   307  		}
   308  		quorum := len(perPoolErrs) / 2
   309  		if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil {
   310  			return BucketInfo{}, poolErr
   311  		}
   312  	}
   313  
   314  	for i, err := range errs {
   315  		if err == nil {
   316  			return bucketInfos[i], nil
   317  		}
   318  	}
   319  
   320  	return BucketInfo{}, toObjectErr(errVolumeNotFound, bucket)
   321  }
   322  
   323  func (client *remotePeerS3Client) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) {
   324  	bi, err := listBucketsRPC.Call(ctx, client.gridConn(), &opts)
   325  	if err != nil {
   326  		return nil, toStorageErr(err)
   327  	}
   328  	buckets := make([]BucketInfo, 0, len(bi.Value()))
   329  	for _, b := range bi.Value() {
   330  		if b != nil {
   331  			buckets = append(buckets, *b)
   332  		}
   333  	}
   334  	bi.Recycle() // BucketInfo has no internal pointers, so it's safe to recycle.
   335  	return buckets, nil
   336  }
   337  
   338  func (client *remotePeerS3Client) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
   339  	conn := client.gridConn()
   340  	if conn == nil {
   341  		return madmin.HealResultItem{}, nil
   342  	}
   343  
   344  	mss := grid.NewMSSWith(map[string]string{
   345  		peerS3Bucket:        bucket,
   346  		peerS3BucketDeleted: strconv.FormatBool(opts.Remove),
   347  	})
   348  
   349  	_, err := healBucketRPC.Call(ctx, conn, mss)
   350  
   351  	// Initialize heal result info
   352  	return madmin.HealResultItem{
   353  		Type:     madmin.HealItemBucket,
   354  		Bucket:   bucket,
   355  		SetCount: -1, // explicitly set an invalid value -1, for bucket heal scenario
   356  	}, toStorageErr(err)
   357  }
   358  
   359  // GetBucketInfo returns bucket stat info from a peer
   360  func (client *remotePeerS3Client) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) {
   361  	conn := client.gridConn()
   362  	if conn == nil {
   363  		return BucketInfo{}, nil
   364  	}
   365  
   366  	mss := grid.NewMSSWith(map[string]string{
   367  		peerS3Bucket:        bucket,
   368  		peerS3BucketDeleted: strconv.FormatBool(opts.Deleted),
   369  	})
   370  
   371  	volInfo, err := headBucketRPC.Call(ctx, conn, mss)
   372  	if err != nil {
   373  		return BucketInfo{}, toStorageErr(err)
   374  	}
   375  
   376  	return BucketInfo{
   377  		Name:    volInfo.Name,
   378  		Created: volInfo.Created,
   379  	}, nil
   380  }
   381  
   382  // MakeBucket creates bucket across all peers
   383  func (sys *S3PeerSys) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error {
   384  	g := errgroup.WithNErrs(len(sys.peerClients))
   385  	for idx, client := range sys.peerClients {
   386  		client := client
   387  		g.Go(func() error {
   388  			if client == nil {
   389  				return errPeerOffline
   390  			}
   391  			return client.MakeBucket(ctx, bucket, opts)
   392  		}, idx)
   393  	}
   394  	errs := g.Wait()
   395  
   396  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   397  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   398  		for i, client := range sys.peerClients {
   399  			if slices.Contains(client.GetPools(), poolIdx) {
   400  				perPoolErrs = append(perPoolErrs, errs[i])
   401  			}
   402  		}
   403  		if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, len(perPoolErrs)/2+1); poolErr != nil {
   404  			return toObjectErr(poolErr, bucket)
   405  		}
   406  	}
   407  	return nil
   408  }
   409  
   410  // MakeBucket creates a bucket on a peer
   411  func (client *remotePeerS3Client) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error {
   412  	conn := client.gridConn()
   413  	if conn == nil {
   414  		return nil
   415  	}
   416  
   417  	mss := grid.NewMSSWith(map[string]string{
   418  		peerS3Bucket:            bucket,
   419  		peerS3BucketForceCreate: strconv.FormatBool(opts.ForceCreate),
   420  	})
   421  
   422  	_, err := makeBucketRPC.Call(ctx, conn, mss)
   423  	return toStorageErr(err)
   424  }
   425  
   426  // DeleteBucket deletes bucket across all peers
   427  func (sys *S3PeerSys) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
   428  	g := errgroup.WithNErrs(len(sys.peerClients))
   429  	for idx, client := range sys.peerClients {
   430  		client := client
   431  		g.Go(func() error {
   432  			if client == nil {
   433  				return errPeerOffline
   434  			}
   435  			return client.DeleteBucket(ctx, bucket, opts)
   436  		}, idx)
   437  	}
   438  	errs := g.Wait()
   439  
   440  	for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
   441  		perPoolErrs := make([]error, 0, len(sys.peerClients))
   442  		for i, client := range sys.peerClients {
   443  			if slices.Contains(client.GetPools(), poolIdx) {
   444  				perPoolErrs = append(perPoolErrs, errs[i])
   445  			}
   446  		}
   447  		poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, len(perPoolErrs)/2+1)
   448  		if poolErr != nil && !errors.Is(poolErr, errVolumeNotFound) {
   449  			if !opts.NoRecreate {
   450  				// re-create successful deletes, since we are return an error.
   451  				sys.MakeBucket(ctx, bucket, MakeBucketOptions{})
   452  			}
   453  			return toObjectErr(poolErr, bucket)
   454  		}
   455  	}
   456  	return nil
   457  }
   458  
   459  // DeleteBucket deletes bucket on a peer
   460  func (client *remotePeerS3Client) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
   461  	conn := client.gridConn()
   462  	if conn == nil {
   463  		return nil
   464  	}
   465  
   466  	mss := grid.NewMSSWith(map[string]string{
   467  		peerS3Bucket:            bucket,
   468  		peerS3BucketForceDelete: strconv.FormatBool(opts.Force),
   469  	})
   470  
   471  	_, err := deleteBucketRPC.Call(ctx, conn, mss)
   472  	return toStorageErr(err)
   473  }
   474  
   475  func (client remotePeerS3Client) GetHost() string {
   476  	return client.node.Host
   477  }
   478  
   479  func (client remotePeerS3Client) GetPools() []int {
   480  	return client.pools
   481  }
   482  
   483  func (client *remotePeerS3Client) SetPools(p []int) {
   484  	client.pools = make([]int, len(p))
   485  	copy(client.pools, p)
   486  }
   487  
   488  // newPeerS3Clients creates new peer clients.
   489  func newPeerS3Clients(endpoints EndpointServerPools) (peers []peerS3Client) {
   490  	nodes := endpoints.GetNodes()
   491  	peers = make([]peerS3Client, len(nodes))
   492  	for i, node := range nodes {
   493  		if node.IsLocal {
   494  			peers[i] = &localPeerS3Client{node: node}
   495  		} else {
   496  			peers[i] = newPeerS3Client(node)
   497  		}
   498  		peers[i].SetPools(node.Pools)
   499  	}
   500  
   501  	return peers
   502  }
   503  
   504  // Returns a peer S3 client.
   505  func newPeerS3Client(node Node) peerS3Client {
   506  	var gridConn atomic.Pointer[grid.Connection]
   507  
   508  	return &remotePeerS3Client{
   509  		node: node,
   510  		gridConn: func() *grid.Connection {
   511  			// Lazy initialization of grid connection.
   512  			// When we create this peer client, the grid connection is likely not yet initialized.
   513  			if node.GridHost == "" {
   514  				logger.LogOnceIf(context.Background(), fmt.Errorf("gridHost is empty for peer %s", node.Host), node.Host+":gridHost")
   515  				return nil
   516  			}
   517  			gc := gridConn.Load()
   518  			if gc != nil {
   519  				return gc
   520  			}
   521  			gm := globalGrid.Load()
   522  			if gm == nil {
   523  				return nil
   524  			}
   525  			gc = gm.Connection(node.GridHost)
   526  			if gc == nil {
   527  				logger.LogOnceIf(context.Background(), fmt.Errorf("gridHost %s not found for peer %s", node.GridHost, node.Host), node.Host+":gridHost")
   528  				return nil
   529  			}
   530  			gridConn.Store(gc)
   531  			return gc
   532  		},
   533  	}
   534  }