github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/metacache-server-pool.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "os" 26 pathutil "path" 27 "strings" 28 "sync" 29 "time" 30 31 xioutil "github.com/minio/minio/internal/ioutil" 32 "github.com/minio/minio/internal/logger" 33 ) 34 35 func renameAllBucketMetacache(epPath string) error { 36 // Rename all previous `.minio.sys/buckets/<bucketname>/.metacache` to 37 // to `.minio.sys/tmp/` for deletion. 38 return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { 39 if typ == os.ModeDir { 40 tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) 41 if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)), 42 tmpMetacacheOld, epPath); err != nil && err != errFileNotFound { 43 return fmt.Errorf("unable to rename (%s -> %s) %w", 44 pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, slashSeparator)), 45 tmpMetacacheOld, 46 osErrToFileErr(err)) 47 } 48 } 49 return nil 50 }) 51 } 52 53 // listPath will return the requested entries. 54 // If no more entries are in the listing io.EOF is returned, 55 // otherwise nil or an unexpected error is returned. 56 // The listPathOptions given will be checked and modified internally. 57 // Required important fields are Bucket, Prefix, Separator. 58 // Other important fields are Limit, Marker. 59 // List ID always derived from the Marker. 60 func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) { 61 if err := checkListObjsArgs(ctx, o.Bucket, o.Prefix, o.Marker); err != nil { 62 return entries, err 63 } 64 65 // Marker points to before the prefix, just ignore it. 66 if o.Marker < o.Prefix { 67 o.Marker = "" 68 } 69 70 // Marker is set validate pre-condition. 71 if o.Marker != "" && o.Prefix != "" { 72 // Marker not common with prefix is not implemented. Send an empty response 73 if !HasPrefix(o.Marker, o.Prefix) { 74 return entries, io.EOF 75 } 76 } 77 78 // With max keys of zero we have reached eof, return right here. 79 if o.Limit == 0 { 80 return entries, io.EOF 81 } 82 83 // For delimiter and prefix as '/' we do not list anything at all 84 // along // with the prefix. On a flat namespace with 'prefix' 85 // as '/' we don't have any entries, since all the keys are 86 // of form 'keyName/...' 87 if strings.HasPrefix(o.Prefix, SlashSeparator) { 88 return entries, io.EOF 89 } 90 91 // If delimiter is slashSeparator we must return directories of 92 // the non-recursive scan unless explicitly requested. 93 o.IncludeDirectories = o.Separator == slashSeparator 94 if (o.Separator == slashSeparator || o.Separator == "") && !o.Recursive { 95 o.Recursive = o.Separator != slashSeparator 96 o.Separator = slashSeparator 97 } else { 98 // Default is recursive, if delimiter is set then list non recursive. 99 o.Recursive = true 100 } 101 102 // Decode and get the optional list id from the marker. 103 o.parseMarker() 104 if o.BaseDir == "" { 105 o.BaseDir = baseDirFromPrefix(o.Prefix) 106 } 107 o.Transient = o.Transient || isReservedOrInvalidBucket(o.Bucket, false) 108 o.SetFilter() 109 if o.Transient { 110 o.Create = false 111 } 112 113 // We have 2 cases: 114 // 1) Cold listing, just list. 115 // 2) Returning, but with no id. Start async listing. 116 // 3) Returning, with ID, stream from list. 117 // 118 // If we don't have a list id we must ask the server if it has a cache or create a new. 119 if o.ID != "" && !o.Transient { 120 // Create or ping with handout... 121 rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) 122 var c *metacache 123 if rpc == nil { 124 resp := localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(*o) 125 c = &resp 126 } else { 127 rctx, cancel := context.WithTimeout(ctx, 5*time.Second) 128 c, err = rpc.GetMetacacheListing(rctx, *o) 129 cancel() 130 } 131 if err != nil { 132 if errors.Is(err, context.Canceled) { 133 // Context is canceled, return at once. 134 // request canceled, no entries to return 135 return entries, io.EOF 136 } 137 if !errors.Is(err, context.DeadlineExceeded) { 138 // Report error once per bucket, but continue listing. 139 logger.LogOnceIf(ctx, err, "GetMetacacheListing:"+o.Bucket) 140 } 141 o.Transient = true 142 o.Create = false 143 o.ID = mustGetUUID() 144 } else { 145 if c.fileNotFound { 146 // No cache found, no entries found. 147 return entries, io.EOF 148 } 149 if c.status == scanStateError || c.status == scanStateNone { 150 o.ID = "" 151 o.Create = false 152 o.debugln("scan status", c.status, " - waiting a roundtrip to create") 153 } else { 154 // Continue listing 155 o.ID = c.id 156 go func(meta metacache) { 157 // Continuously update while we wait. 158 t := time.NewTicker(metacacheMaxClientWait / 10) 159 defer t.Stop() 160 select { 161 case <-ctx.Done(): 162 // Request is done, stop updating. 163 return 164 case <-t.C: 165 meta.lastHandout = time.Now() 166 meta, _ = rpc.UpdateMetacacheListing(ctx, meta) 167 } 168 }(*c) 169 } 170 } 171 } 172 173 if o.ID != "" && !o.Transient { 174 // We have an existing list ID, continue streaming. 175 if o.Create { 176 o.debugln("Creating", o) 177 entries, err = z.listAndSave(ctx, o) 178 if err == nil || err == io.EOF { 179 return entries, err 180 } 181 entries.truncate(0) 182 } else { 183 if o.pool < len(z.serverPools) && o.set < len(z.serverPools[o.pool].sets) { 184 o.debugln("Resuming", o) 185 entries, err = z.serverPools[o.pool].sets[o.set].streamMetadataParts(ctx, *o) 186 entries.reuse = true // We read from stream and are not sharing results. 187 if err == nil { 188 return entries, nil 189 } 190 } else { 191 err = fmt.Errorf("invalid pool/set") 192 o.pool, o.set = 0, 0 193 } 194 } 195 if IsErr(err, []error{ 196 nil, 197 context.Canceled, 198 context.DeadlineExceeded, 199 // io.EOF is expected and should be returned but no need to log it. 200 io.EOF, 201 }...) { 202 // Expected good errors we don't need to return error. 203 return entries, err 204 } 205 entries.truncate(0) 206 go func() { 207 rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) 208 if rpc != nil { 209 ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second) 210 defer cancel() 211 c, err := rpc.GetMetacacheListing(ctx, *o) 212 if err == nil { 213 c.error = "no longer used" 214 c.status = scanStateError 215 rpc.UpdateMetacacheListing(ctx, *c) 216 } 217 } 218 }() 219 o.ID = "" 220 } 221 222 // Do listing in-place. 223 // Create output for our results. 224 // Create filter for results. 225 o.debugln("Raw List", o) 226 filterCh := make(chan metaCacheEntry, o.Limit) 227 listCtx, cancelList := context.WithCancel(ctx) 228 filteredResults := o.gatherResults(listCtx, filterCh) 229 var wg sync.WaitGroup 230 wg.Add(1) 231 var listErr error 232 233 go func(o listPathOptions) { 234 defer wg.Done() 235 o.StopDiskAtLimit = true 236 listErr = z.listMerged(listCtx, o, filterCh) 237 o.debugln("listMerged returned with", listErr) 238 }(*o) 239 240 entries, err = filteredResults() 241 cancelList() 242 wg.Wait() 243 if listErr != nil && !errors.Is(listErr, context.Canceled) { 244 return entries, listErr 245 } 246 entries.reuse = true 247 truncated := entries.len() > o.Limit || err == nil 248 entries.truncate(o.Limit) 249 if !o.Transient && truncated { 250 if o.ID == "" { 251 entries.listID = mustGetUUID() 252 } else { 253 entries.listID = o.ID 254 } 255 } 256 if !truncated { 257 return entries, io.EOF 258 } 259 return entries, nil 260 } 261 262 // listMerged will list across all sets and return a merged results stream. 263 // The result channel is closed when no more results are expected. 264 func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error { 265 var mu sync.Mutex 266 var wg sync.WaitGroup 267 var errs []error 268 allAtEOF := true 269 var inputs []chan metaCacheEntry 270 mu.Lock() 271 // Ask all sets and merge entries. 272 listCtx, cancelList := context.WithCancel(ctx) 273 defer cancelList() 274 for _, pool := range z.serverPools { 275 for _, set := range pool.sets { 276 wg.Add(1) 277 innerResults := make(chan metaCacheEntry, 100) 278 inputs = append(inputs, innerResults) 279 go func(i int, set *erasureObjects) { 280 defer wg.Done() 281 err := set.listPath(listCtx, o, innerResults) 282 mu.Lock() 283 defer mu.Unlock() 284 if err == nil { 285 allAtEOF = false 286 } 287 errs[i] = err 288 }(len(errs), set) 289 errs = append(errs, nil) 290 } 291 } 292 mu.Unlock() 293 294 // Gather results to a single channel. 295 // Quorum is one since we are merging across sets. 296 err := mergeEntryChannels(ctx, inputs, results, 1) 297 298 cancelList() 299 wg.Wait() 300 301 // we should return 'errs' from per disk 302 if isAllNotFound(errs) { 303 if isAllVolumeNotFound(errs) { 304 return errVolumeNotFound 305 } 306 return nil 307 } 308 309 if err != nil { 310 return err 311 } 312 313 if contextCanceled(ctx) { 314 return ctx.Err() 315 } 316 317 for _, err := range errs { 318 if errors.Is(err, io.EOF) { 319 continue 320 } 321 if err == nil || contextCanceled(ctx) || errors.Is(err, context.Canceled) { 322 allAtEOF = false 323 continue 324 } 325 logger.LogIf(ctx, err) 326 return err 327 } 328 if allAtEOF { 329 return io.EOF 330 } 331 return nil 332 } 333 334 // triggerExpiryAndRepl applies lifecycle and replication actions on the listing 335 // It returns true if the listing is non-versioned and the given object is expired. 336 func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheEntry) (skip bool) { 337 versioned := o.Versioning != nil && o.Versioning.Versioned(obj.name) 338 339 // skip latest object from listing only for regular 340 // listObjects calls, versioned based listing cannot 341 // filter out between versions 'obj' cannot be truncated 342 // in such a manner, so look for skipping an object only 343 // for regular ListObjects() call only. 344 if !o.Versioned && !o.V1 { 345 fi, err := obj.fileInfo(o.Bucket) 346 if err != nil { 347 return 348 } 349 objInfo := fi.ToObjectInfo(o.Bucket, obj.name, versioned) 350 if o.Lifecycle != nil { 351 act := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo).Action 352 skip = act.Delete() && !act.DeleteRestored() 353 } 354 } 355 356 fiv, err := obj.fileInfoVersions(o.Bucket) 357 if err != nil { 358 return 359 } 360 361 // Expire all versions if needed, if not attempt to queue for replication. 362 for _, version := range fiv.Versions { 363 objInfo := version.ToObjectInfo(o.Bucket, obj.name, versioned) 364 365 if o.Lifecycle != nil { 366 evt := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo) 367 if evt.Action.Delete() { 368 globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_s3ListObjects) 369 if !evt.Action.DeleteRestored() { 370 continue 371 } // queue version for replication upon expired restored copies if needed. 372 } 373 } 374 375 queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0) 376 } 377 return 378 } 379 380 func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) { 381 // Use ID as the object name... 382 o.pool = z.getAvailablePoolIdx(ctx, minioMetaBucket, o.ID, 10<<20) 383 if o.pool < 0 { 384 // No space or similar, don't persist the listing. 385 o.pool = 0 386 o.Create = false 387 o.ID = "" 388 o.Transient = true 389 return entries, errDiskFull 390 } 391 o.set = z.serverPools[o.pool].getHashedSetIndex(o.ID) 392 saver := z.serverPools[o.pool].sets[o.set] 393 394 // Disconnect from call above, but cancel on exit. 395 listCtx, cancel := context.WithCancel(GlobalContext) 396 saveCh := make(chan metaCacheEntry, metacacheBlockSize) 397 inCh := make(chan metaCacheEntry, metacacheBlockSize) 398 outCh := make(chan metaCacheEntry, o.Limit) 399 400 filteredResults := o.gatherResults(ctx, outCh) 401 402 mc := o.newMetacache() 403 meta := metaCacheRPC{meta: &mc, cancel: cancel, rpc: globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)), o: *o} 404 405 // Save listing... 406 go func() { 407 if err := saver.saveMetaCacheStream(listCtx, &meta, saveCh); err != nil { 408 meta.setErr(err.Error()) 409 } 410 cancel() 411 }() 412 413 // Do listing... 414 go func(o listPathOptions) { 415 err := z.listMerged(listCtx, o, inCh) 416 if err != nil { 417 meta.setErr(err.Error()) 418 } 419 o.debugln("listAndSave: listing", o.ID, "finished with ", err) 420 }(*o) 421 422 // Keep track of when we return since we no longer have to send entries to output. 423 var funcReturned bool 424 var funcReturnedMu sync.Mutex 425 defer func() { 426 funcReturnedMu.Lock() 427 funcReturned = true 428 funcReturnedMu.Unlock() 429 }() 430 // Write listing to results and saver. 431 go func() { 432 var returned bool 433 for entry := range inCh { 434 if !returned { 435 funcReturnedMu.Lock() 436 returned = funcReturned 437 funcReturnedMu.Unlock() 438 outCh <- entry 439 if returned { 440 xioutil.SafeClose(outCh) 441 } 442 } 443 entry.reusable = returned 444 saveCh <- entry 445 } 446 if !returned { 447 xioutil.SafeClose(outCh) 448 } 449 xioutil.SafeClose(saveCh) 450 }() 451 452 return filteredResults() 453 }