github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/peer-s3-client.go (about) 1 // Copyright (c) 2015-2023 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "sort" 25 "strconv" 26 "sync/atomic" 27 "time" 28 29 "github.com/minio/madmin-go/v3" 30 "github.com/minio/minio/internal/grid" 31 "github.com/minio/minio/internal/logger" 32 "github.com/minio/pkg/v2/sync/errgroup" 33 "golang.org/x/exp/slices" 34 ) 35 36 var errPeerOffline = errors.New("peer is offline") 37 38 type peerS3Client interface { 39 ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) 40 HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) 41 GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) 42 MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error 43 DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error 44 45 GetHost() string 46 SetPools([]int) 47 GetPools() []int 48 } 49 50 type localPeerS3Client struct { 51 node Node 52 pools []int 53 } 54 55 func (l *localPeerS3Client) GetHost() string { 56 return l.node.Host 57 } 58 59 func (l *localPeerS3Client) SetPools(p []int) { 60 l.pools = make([]int, len(p)) 61 copy(l.pools, p) 62 } 63 64 func (l localPeerS3Client) GetPools() []int { 65 return l.pools 66 } 67 68 func (l localPeerS3Client) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) { 69 return listBucketsLocal(ctx, opts) 70 } 71 72 func (l localPeerS3Client) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { 73 return healBucketLocal(ctx, bucket, opts) 74 } 75 76 func (l localPeerS3Client) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) { 77 return getBucketInfoLocal(ctx, bucket, opts) 78 } 79 80 func (l localPeerS3Client) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error { 81 return makeBucketLocal(ctx, bucket, opts) 82 } 83 84 func (l localPeerS3Client) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error { 85 return deleteBucketLocal(ctx, bucket, opts) 86 } 87 88 // client to talk to peer Nodes. 89 type remotePeerS3Client struct { 90 node Node 91 pools []int 92 93 // Function that returns the grid connection for this peer when initialized. 94 // Will return nil if the grid connection is not initialized yet. 95 gridConn func() *grid.Connection 96 } 97 98 // S3PeerSys - S3 peer call system. 99 type S3PeerSys struct { 100 peerClients []peerS3Client // Excludes self 101 poolsCount int 102 } 103 104 // NewS3PeerSys - creates new S3 peer calls. 105 func NewS3PeerSys(endpoints EndpointServerPools) *S3PeerSys { 106 return &S3PeerSys{ 107 peerClients: newPeerS3Clients(endpoints), 108 poolsCount: len(endpoints), 109 } 110 } 111 112 // HealBucket - heals buckets at node level 113 func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { 114 g := errgroup.WithNErrs(len(sys.peerClients)) 115 116 for idx, client := range sys.peerClients { 117 idx := idx 118 client := client 119 g.Go(func() error { 120 if client == nil { 121 return errPeerOffline 122 } 123 _, err := client.GetBucketInfo(ctx, bucket, BucketOptions{}) 124 return err 125 }, idx) 126 } 127 128 errs := g.Wait() 129 130 var poolErrs []error 131 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 132 perPoolErrs := make([]error, 0, len(sys.peerClients)) 133 for i, client := range sys.peerClients { 134 if slices.Contains(client.GetPools(), poolIdx) { 135 perPoolErrs = append(perPoolErrs, errs[i]) 136 } 137 } 138 quorum := len(perPoolErrs) / 2 139 poolErrs = append(poolErrs, reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum)) 140 } 141 142 opts.Remove = isAllBucketsNotFound(poolErrs) 143 opts.Recreate = !opts.Remove 144 145 g = errgroup.WithNErrs(len(sys.peerClients)) 146 healBucketResults := make([]madmin.HealResultItem, len(sys.peerClients)) 147 for idx, client := range sys.peerClients { 148 idx := idx 149 client := client 150 g.Go(func() error { 151 if client == nil { 152 return errPeerOffline 153 } 154 res, err := client.HealBucket(ctx, bucket, opts) 155 if err != nil { 156 return err 157 } 158 healBucketResults[idx] = res 159 return nil 160 }, idx) 161 } 162 163 errs = g.Wait() 164 165 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 166 perPoolErrs := make([]error, 0, len(sys.peerClients)) 167 for i, client := range sys.peerClients { 168 if slices.Contains(client.GetPools(), poolIdx) { 169 perPoolErrs = append(perPoolErrs, errs[i]) 170 } 171 } 172 quorum := len(perPoolErrs) / 2 173 if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil { 174 return madmin.HealResultItem{}, poolErr 175 } 176 } 177 178 for i, err := range errs { 179 if err == nil { 180 return healBucketResults[i], nil 181 } 182 } 183 184 return madmin.HealResultItem{}, toObjectErr(errVolumeNotFound, bucket) 185 } 186 187 // ListBuckets lists buckets across all nodes and returns a consistent view: 188 // - Return an error when a pool cannot return N/2+1 valid bucket information 189 // - For each pool, check if the bucket exists in N/2+1 nodes before including it in the final result 190 func (sys *S3PeerSys) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) { 191 g := errgroup.WithNErrs(len(sys.peerClients)) 192 193 nodeBuckets := make([][]BucketInfo, len(sys.peerClients)) 194 195 for idx, client := range sys.peerClients { 196 idx := idx 197 client := client 198 g.Go(func() error { 199 if client == nil { 200 return errPeerOffline 201 } 202 localBuckets, err := client.ListBuckets(ctx, opts) 203 if err != nil { 204 return err 205 } 206 nodeBuckets[idx] = localBuckets 207 return nil 208 }, idx) 209 } 210 211 errs := g.Wait() 212 213 // The list of buckets in a map to avoid duplication 214 resultMap := make(map[string]BucketInfo) 215 216 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 217 perPoolErrs := make([]error, 0, len(sys.peerClients)) 218 for i, client := range sys.peerClients { 219 if slices.Contains(client.GetPools(), poolIdx) { 220 perPoolErrs = append(perPoolErrs, errs[i]) 221 } 222 } 223 quorum := len(perPoolErrs) / 2 224 if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil { 225 return nil, poolErr 226 } 227 228 bucketsMap := make(map[string]int) 229 for idx, buckets := range nodeBuckets { 230 if buckets == nil { 231 continue 232 } 233 if !slices.Contains(sys.peerClients[idx].GetPools(), poolIdx) { 234 continue 235 } 236 for _, bi := range buckets { 237 _, ok := resultMap[bi.Name] 238 if ok { 239 // Skip it, this bucket is found in another pool 240 continue 241 } 242 bucketsMap[bi.Name]++ 243 if bucketsMap[bi.Name] >= quorum { 244 resultMap[bi.Name] = bi 245 } 246 } 247 } 248 // loop through buckets and see if some with lost quorum 249 // these could be stale buckets lying around, queue a heal 250 // of such a bucket. This is needed here as we identify such 251 // buckets here while listing buckets. As part of regular 252 // globalBucketMetadataSys.Init() call would get a valid 253 // buckets only and not the quourum lost ones like this, so 254 // explicit call 255 for bktName, count := range bucketsMap { 256 if count < quorum { 257 // Queue a bucket heal task 258 globalMRFState.addPartialOp(partialOperation{ 259 bucket: bktName, 260 queued: time.Now(), 261 }) 262 } 263 } 264 } 265 266 result := make([]BucketInfo, 0, len(resultMap)) 267 for _, bi := range resultMap { 268 result = append(result, bi) 269 } 270 271 sort.Slice(result, func(i, j int) bool { 272 return result[i].Name < result[j].Name 273 }) 274 275 return result, nil 276 } 277 278 // GetBucketInfo returns bucket stat info about bucket on disk across all peers 279 func (sys *S3PeerSys) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (binfo BucketInfo, err error) { 280 g := errgroup.WithNErrs(len(sys.peerClients)) 281 282 bucketInfos := make([]BucketInfo, len(sys.peerClients)) 283 for idx, client := range sys.peerClients { 284 idx := idx 285 client := client 286 g.Go(func() error { 287 if client == nil { 288 return errPeerOffline 289 } 290 bucketInfo, err := client.GetBucketInfo(ctx, bucket, opts) 291 if err != nil { 292 return err 293 } 294 bucketInfos[idx] = bucketInfo 295 return nil 296 }, idx) 297 } 298 299 errs := g.Wait() 300 301 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 302 perPoolErrs := make([]error, 0, len(sys.peerClients)) 303 for i, client := range sys.peerClients { 304 if slices.Contains(client.GetPools(), poolIdx) { 305 perPoolErrs = append(perPoolErrs, errs[i]) 306 } 307 } 308 quorum := len(perPoolErrs) / 2 309 if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum); poolErr != nil { 310 return BucketInfo{}, poolErr 311 } 312 } 313 314 for i, err := range errs { 315 if err == nil { 316 return bucketInfos[i], nil 317 } 318 } 319 320 return BucketInfo{}, toObjectErr(errVolumeNotFound, bucket) 321 } 322 323 func (client *remotePeerS3Client) ListBuckets(ctx context.Context, opts BucketOptions) ([]BucketInfo, error) { 324 bi, err := listBucketsRPC.Call(ctx, client.gridConn(), &opts) 325 if err != nil { 326 return nil, toStorageErr(err) 327 } 328 buckets := make([]BucketInfo, 0, len(bi.Value())) 329 for _, b := range bi.Value() { 330 if b != nil { 331 buckets = append(buckets, *b) 332 } 333 } 334 bi.Recycle() // BucketInfo has no internal pointers, so it's safe to recycle. 335 return buckets, nil 336 } 337 338 func (client *remotePeerS3Client) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { 339 conn := client.gridConn() 340 if conn == nil { 341 return madmin.HealResultItem{}, nil 342 } 343 344 mss := grid.NewMSSWith(map[string]string{ 345 peerS3Bucket: bucket, 346 peerS3BucketDeleted: strconv.FormatBool(opts.Remove), 347 }) 348 349 _, err := healBucketRPC.Call(ctx, conn, mss) 350 351 // Initialize heal result info 352 return madmin.HealResultItem{ 353 Type: madmin.HealItemBucket, 354 Bucket: bucket, 355 SetCount: -1, // explicitly set an invalid value -1, for bucket heal scenario 356 }, toStorageErr(err) 357 } 358 359 // GetBucketInfo returns bucket stat info from a peer 360 func (client *remotePeerS3Client) GetBucketInfo(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) { 361 conn := client.gridConn() 362 if conn == nil { 363 return BucketInfo{}, nil 364 } 365 366 mss := grid.NewMSSWith(map[string]string{ 367 peerS3Bucket: bucket, 368 peerS3BucketDeleted: strconv.FormatBool(opts.Deleted), 369 }) 370 371 volInfo, err := headBucketRPC.Call(ctx, conn, mss) 372 if err != nil { 373 return BucketInfo{}, toStorageErr(err) 374 } 375 376 return BucketInfo{ 377 Name: volInfo.Name, 378 Created: volInfo.Created, 379 }, nil 380 } 381 382 // MakeBucket creates bucket across all peers 383 func (sys *S3PeerSys) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error { 384 g := errgroup.WithNErrs(len(sys.peerClients)) 385 for idx, client := range sys.peerClients { 386 client := client 387 g.Go(func() error { 388 if client == nil { 389 return errPeerOffline 390 } 391 return client.MakeBucket(ctx, bucket, opts) 392 }, idx) 393 } 394 errs := g.Wait() 395 396 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 397 perPoolErrs := make([]error, 0, len(sys.peerClients)) 398 for i, client := range sys.peerClients { 399 if slices.Contains(client.GetPools(), poolIdx) { 400 perPoolErrs = append(perPoolErrs, errs[i]) 401 } 402 } 403 if poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, len(perPoolErrs)/2+1); poolErr != nil { 404 return toObjectErr(poolErr, bucket) 405 } 406 } 407 return nil 408 } 409 410 // MakeBucket creates a bucket on a peer 411 func (client *remotePeerS3Client) MakeBucket(ctx context.Context, bucket string, opts MakeBucketOptions) error { 412 conn := client.gridConn() 413 if conn == nil { 414 return nil 415 } 416 417 mss := grid.NewMSSWith(map[string]string{ 418 peerS3Bucket: bucket, 419 peerS3BucketForceCreate: strconv.FormatBool(opts.ForceCreate), 420 }) 421 422 _, err := makeBucketRPC.Call(ctx, conn, mss) 423 return toStorageErr(err) 424 } 425 426 // DeleteBucket deletes bucket across all peers 427 func (sys *S3PeerSys) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error { 428 g := errgroup.WithNErrs(len(sys.peerClients)) 429 for idx, client := range sys.peerClients { 430 client := client 431 g.Go(func() error { 432 if client == nil { 433 return errPeerOffline 434 } 435 return client.DeleteBucket(ctx, bucket, opts) 436 }, idx) 437 } 438 errs := g.Wait() 439 440 for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { 441 perPoolErrs := make([]error, 0, len(sys.peerClients)) 442 for i, client := range sys.peerClients { 443 if slices.Contains(client.GetPools(), poolIdx) { 444 perPoolErrs = append(perPoolErrs, errs[i]) 445 } 446 } 447 poolErr := reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, len(perPoolErrs)/2+1) 448 if poolErr != nil && !errors.Is(poolErr, errVolumeNotFound) { 449 if !opts.NoRecreate { 450 // re-create successful deletes, since we are return an error. 451 sys.MakeBucket(ctx, bucket, MakeBucketOptions{}) 452 } 453 return toObjectErr(poolErr, bucket) 454 } 455 } 456 return nil 457 } 458 459 // DeleteBucket deletes bucket on a peer 460 func (client *remotePeerS3Client) DeleteBucket(ctx context.Context, bucket string, opts DeleteBucketOptions) error { 461 conn := client.gridConn() 462 if conn == nil { 463 return nil 464 } 465 466 mss := grid.NewMSSWith(map[string]string{ 467 peerS3Bucket: bucket, 468 peerS3BucketForceDelete: strconv.FormatBool(opts.Force), 469 }) 470 471 _, err := deleteBucketRPC.Call(ctx, conn, mss) 472 return toStorageErr(err) 473 } 474 475 func (client remotePeerS3Client) GetHost() string { 476 return client.node.Host 477 } 478 479 func (client remotePeerS3Client) GetPools() []int { 480 return client.pools 481 } 482 483 func (client *remotePeerS3Client) SetPools(p []int) { 484 client.pools = make([]int, len(p)) 485 copy(client.pools, p) 486 } 487 488 // newPeerS3Clients creates new peer clients. 489 func newPeerS3Clients(endpoints EndpointServerPools) (peers []peerS3Client) { 490 nodes := endpoints.GetNodes() 491 peers = make([]peerS3Client, len(nodes)) 492 for i, node := range nodes { 493 if node.IsLocal { 494 peers[i] = &localPeerS3Client{node: node} 495 } else { 496 peers[i] = newPeerS3Client(node) 497 } 498 peers[i].SetPools(node.Pools) 499 } 500 501 return peers 502 } 503 504 // Returns a peer S3 client. 505 func newPeerS3Client(node Node) peerS3Client { 506 var gridConn atomic.Pointer[grid.Connection] 507 508 return &remotePeerS3Client{ 509 node: node, 510 gridConn: func() *grid.Connection { 511 // Lazy initialization of grid connection. 512 // When we create this peer client, the grid connection is likely not yet initialized. 513 if node.GridHost == "" { 514 logger.LogOnceIf(context.Background(), fmt.Errorf("gridHost is empty for peer %s", node.Host), node.Host+":gridHost") 515 return nil 516 } 517 gc := gridConn.Load() 518 if gc != nil { 519 return gc 520 } 521 gm := globalGrid.Load() 522 if gm == nil { 523 return nil 524 } 525 gc = gm.Connection(node.GridHost) 526 if gc == nil { 527 logger.LogOnceIf(context.Background(), fmt.Errorf("gridHost %s not found for peer %s", node.GridHost, node.Host), node.Host+":gridHost") 528 return nil 529 } 530 gridConn.Store(gc) 531 return gc 532 }, 533 } 534 }