
     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <>.
    18  package cmd
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"os"
    26  	pathutil "path"
    27  	"strings"
    28  	"sync"
    29  	"time"
    31  	xioutil ""
    32  	""
    33  )
    35  func renameAllBucketMetacache(epPath string) error {
    36  	// Rename all previous `.minio.sys/buckets/<bucketname>/.metacache` to
    37  	// to `.minio.sys/tmp/` for deletion.
    38  	return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error {
    39  		if typ == os.ModeDir {
    40  			tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID())
    41  			if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)),
    42  				tmpMetacacheOld, epPath); err != nil && err != errFileNotFound {
    43  				return fmt.Errorf("unable to rename (%s -> %s) %w",
    44  					pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, slashSeparator)),
    45  					tmpMetacacheOld,
    46  					osErrToFileErr(err))
    47  			}
    48  		}
    49  		return nil
    50  	})
    51  }
    53  // listPath will return the requested entries.
    54  // If no more entries are in the listing io.EOF is returned,
    55  // otherwise nil or an unexpected error is returned.
    56  // The listPathOptions given will be checked and modified internally.
    57  // Required important fields are Bucket, Prefix, Separator.
    58  // Other important fields are Limit, Marker.
    59  // List ID always derived from the Marker.
    60  func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) {
    61  	if err := checkListObjsArgs(ctx, o.Bucket, o.Prefix, o.Marker); err != nil {
    62  		return entries, err
    63  	}
    65  	// Marker points to before the prefix, just ignore it.
    66  	if o.Marker < o.Prefix {
    67  		o.Marker = ""
    68  	}
    70  	// Marker is set validate pre-condition.
    71  	if o.Marker != "" && o.Prefix != "" {
    72  		// Marker not common with prefix is not implemented. Send an empty response
    73  		if !HasPrefix(o.Marker, o.Prefix) {
    74  			return entries, io.EOF
    75  		}
    76  	}
    78  	// With max keys of zero we have reached eof, return right here.
    79  	if o.Limit == 0 {
    80  		return entries, io.EOF
    81  	}
    83  	// For delimiter and prefix as '/' we do not list anything at all
    84  	// along // with the prefix. On a flat namespace with 'prefix'
    85  	// as '/' we don't have any entries, since all the keys are
    86  	// of form 'keyName/...'
    87  	if strings.HasPrefix(o.Prefix, SlashSeparator) {
    88  		return entries, io.EOF
    89  	}
    91  	// If delimiter is slashSeparator we must return directories of
    92  	// the non-recursive scan unless explicitly requested.
    93  	o.IncludeDirectories = o.Separator == slashSeparator
    94  	if (o.Separator == slashSeparator || o.Separator == "") && !o.Recursive {
    95  		o.Recursive = o.Separator != slashSeparator
    96  		o.Separator = slashSeparator
    97  	} else {
    98  		// Default is recursive, if delimiter is set then list non recursive.
    99  		o.Recursive = true
   100  	}
   102  	// Decode and get the optional list id from the marker.
   103  	o.parseMarker()
   104  	if o.BaseDir == "" {
   105  		o.BaseDir = baseDirFromPrefix(o.Prefix)
   106  	}
   107  	o.Transient = o.Transient || isReservedOrInvalidBucket(o.Bucket, false)
   108  	o.SetFilter()
   109  	if o.Transient {
   110  		o.Create = false
   111  	}
   113  	// We have 2 cases:
   114  	// 1) Cold listing, just list.
   115  	// 2) Returning, but with no id. Start async listing.
   116  	// 3) Returning, with ID, stream from list.
   117  	//
   118  	// If we don't have a list id we must ask the server if it has a cache or create a new.
   119  	if o.ID != "" && !o.Transient {
   120  		// Create or ping with handout...
   121  		rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix))
   122  		var c *metacache
   123  		if rpc == nil {
   124  			resp := localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(*o)
   125  			c = &resp
   126  		} else {
   127  			rctx, cancel := context.WithTimeout(ctx, 5*time.Second)
   128  			c, err = rpc.GetMetacacheListing(rctx, *o)
   129  			cancel()
   130  		}
   131  		if err != nil {
   132  			if errors.Is(err, context.Canceled) {
   133  				// Context is canceled, return at once.
   134  				// request canceled, no entries to return
   135  				return entries, io.EOF
   136  			}
   137  			if !errors.Is(err, context.DeadlineExceeded) {
   138  				// Report error once per bucket, but continue listing.
   139  				logger.LogOnceIf(ctx, err, "GetMetacacheListing:"+o.Bucket)
   140  			}
   141  			o.Transient = true
   142  			o.Create = false
   143  			o.ID = mustGetUUID()
   144  		} else {
   145  			if c.fileNotFound {
   146  				// No cache found, no entries found.
   147  				return entries, io.EOF
   148  			}
   149  			if c.status == scanStateError || c.status == scanStateNone {
   150  				o.ID = ""
   151  				o.Create = false
   152  				o.debugln("scan status", c.status, " - waiting a roundtrip to create")
   153  			} else {
   154  				// Continue listing
   155  				o.ID =
   156  				go func(meta metacache) {
   157  					// Continuously update while we wait.
   158  					t := time.NewTicker(metacacheMaxClientWait / 10)
   159  					defer t.Stop()
   160  					select {
   161  					case <-ctx.Done():
   162  						// Request is done, stop updating.
   163  						return
   164  					case <-t.C:
   165  						meta.lastHandout = time.Now()
   166  						meta, _ = rpc.UpdateMetacacheListing(ctx, meta)
   167  					}
   168  				}(*c)
   169  			}
   170  		}
   171  	}
   173  	if o.ID != "" && !o.Transient {
   174  		// We have an existing list ID, continue streaming.
   175  		if o.Create {
   176  			o.debugln("Creating", o)
   177  			entries, err = z.listAndSave(ctx, o)
   178  			if err == nil || err == io.EOF {
   179  				return entries, err
   180  			}
   181  			entries.truncate(0)
   182  		} else {
   183  			if o.pool < len(z.serverPools) && o.set < len(z.serverPools[o.pool].sets) {
   184  				o.debugln("Resuming", o)
   185  				entries, err = z.serverPools[o.pool].sets[o.set].streamMetadataParts(ctx, *o)
   186  				entries.reuse = true // We read from stream and are not sharing results.
   187  				if err == nil {
   188  					return entries, nil
   189  				}
   190  			} else {
   191  				err = fmt.Errorf("invalid pool/set")
   192  				o.pool, o.set = 0, 0
   193  			}
   194  		}
   195  		if IsErr(err, []error{
   196  			nil,
   197  			context.Canceled,
   198  			context.DeadlineExceeded,
   199  			// io.EOF is expected and should be returned but no need to log it.
   200  			io.EOF,
   201  		}...) {
   202  			// Expected good errors we don't need to return error.
   203  			return entries, err
   204  		}
   205  		entries.truncate(0)
   206  		go func() {
   207  			rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix))
   208  			if rpc != nil {
   209  				ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second)
   210  				defer cancel()
   211  				c, err := rpc.GetMetacacheListing(ctx, *o)
   212  				if err == nil {
   213  					c.error = "no longer used"
   214  					c.status = scanStateError
   215  					rpc.UpdateMetacacheListing(ctx, *c)
   216  				}
   217  			}
   218  		}()
   219  		o.ID = ""
   220  	}
   222  	// Do listing in-place.
   223  	// Create output for our results.
   224  	// Create filter for results.
   225  	o.debugln("Raw List", o)
   226  	filterCh := make(chan metaCacheEntry, o.Limit)
   227  	listCtx, cancelList := context.WithCancel(ctx)
   228  	filteredResults := o.gatherResults(listCtx, filterCh)
   229  	var wg sync.WaitGroup
   230  	wg.Add(1)
   231  	var listErr error
   233  	go func(o listPathOptions) {
   234  		defer wg.Done()
   235  		o.StopDiskAtLimit = true
   236  		listErr = z.listMerged(listCtx, o, filterCh)
   237  		o.debugln("listMerged returned with", listErr)
   238  	}(*o)
   240  	entries, err = filteredResults()
   241  	cancelList()
   242  	wg.Wait()
   243  	if listErr != nil && !errors.Is(listErr, context.Canceled) {
   244  		return entries, listErr
   245  	}
   246  	entries.reuse = true
   247  	truncated := entries.len() > o.Limit || err == nil
   248  	entries.truncate(o.Limit)
   249  	if !o.Transient && truncated {
   250  		if o.ID == "" {
   251  			entries.listID = mustGetUUID()
   252  		} else {
   253  			entries.listID = o.ID
   254  		}
   255  	}
   256  	if !truncated {
   257  		return entries, io.EOF
   258  	}
   259  	return entries, nil
   260  }
   262  // listMerged will list across all sets and return a merged results stream.
   263  // The result channel is closed when no more results are expected.
   264  func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error {
   265  	var mu sync.Mutex
   266  	var wg sync.WaitGroup
   267  	var errs []error
   268  	allAtEOF := true
   269  	var inputs []chan metaCacheEntry
   270  	mu.Lock()
   271  	// Ask all sets and merge entries.
   272  	listCtx, cancelList := context.WithCancel(ctx)
   273  	defer cancelList()
   274  	for _, pool := range z.serverPools {
   275  		for _, set := range pool.sets {
   276  			wg.Add(1)
   277  			innerResults := make(chan metaCacheEntry, 100)
   278  			inputs = append(inputs, innerResults)
   279  			go func(i int, set *erasureObjects) {
   280  				defer wg.Done()
   281  				err := set.listPath(listCtx, o, innerResults)
   282  				mu.Lock()
   283  				defer mu.Unlock()
   284  				if err == nil {
   285  					allAtEOF = false
   286  				}
   287  				errs[i] = err
   288  			}(len(errs), set)
   289  			errs = append(errs, nil)
   290  		}
   291  	}
   292  	mu.Unlock()
   294  	// Gather results to a single channel.
   295  	// Quorum is one since we are merging across sets.
   296  	err := mergeEntryChannels(ctx, inputs, results, 1)
   298  	cancelList()
   299  	wg.Wait()
   301  	// we should return 'errs' from per disk
   302  	if isAllNotFound(errs) {
   303  		if isAllVolumeNotFound(errs) {
   304  			return errVolumeNotFound
   305  		}
   306  		return nil
   307  	}
   309  	if err != nil {
   310  		return err
   311  	}
   313  	if contextCanceled(ctx) {
   314  		return ctx.Err()
   315  	}
   317  	for _, err := range errs {
   318  		if errors.Is(err, io.EOF) {
   319  			continue
   320  		}
   321  		if err == nil || contextCanceled(ctx) || errors.Is(err, context.Canceled) {
   322  			allAtEOF = false
   323  			continue
   324  		}
   325  		logger.LogIf(ctx, err)
   326  		return err
   327  	}
   328  	if allAtEOF {
   329  		return io.EOF
   330  	}
   331  	return nil
   332  }
   334  // triggerExpiryAndRepl applies lifecycle and replication actions on the listing
   335  // It returns true if the listing is non-versioned and the given object is expired.
   336  func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheEntry) (skip bool) {
   337  	versioned := o.Versioning != nil && o.Versioning.Versioned(
   339  	// skip latest object from listing only for regular
   340  	// listObjects calls, versioned based listing cannot
   341  	// filter out between versions 'obj' cannot be truncated
   342  	// in such a manner, so look for skipping an object only
   343  	// for regular ListObjects() call only.
   344  	if !o.Versioned && !o.V1 {
   345  		fi, err := obj.fileInfo(o.Bucket)
   346  		if err != nil {
   347  			return
   348  		}
   349  		objInfo := fi.ToObjectInfo(o.Bucket,, versioned)
   350  		if o.Lifecycle != nil {
   351  			act := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo).Action
   352  			skip = act.Delete() && !act.DeleteRestored()
   353  		}
   354  	}
   356  	fiv, err := obj.fileInfoVersions(o.Bucket)
   357  	if err != nil {
   358  		return
   359  	}
   361  	// Expire all versions if needed, if not attempt to queue for replication.
   362  	for _, version := range fiv.Versions {
   363  		objInfo := version.ToObjectInfo(o.Bucket,, versioned)
   365  		if o.Lifecycle != nil {
   366  			evt := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo)
   367  			if evt.Action.Delete() {
   368  				globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_s3ListObjects)
   369  				if !evt.Action.DeleteRestored() {
   370  					continue
   371  				} // queue version for replication upon expired restored copies if needed.
   372  			}
   373  		}
   375  		queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0)
   376  	}
   377  	return
   378  }
   380  func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) {
   381  	// Use ID as the object name...
   382  	o.pool = z.getAvailablePoolIdx(ctx, minioMetaBucket, o.ID, 10<<20)
   383  	if o.pool < 0 {
   384  		// No space or similar, don't persist the listing.
   385  		o.pool = 0
   386  		o.Create = false
   387  		o.ID = ""
   388  		o.Transient = true
   389  		return entries, errDiskFull
   390  	}
   391  	o.set = z.serverPools[o.pool].getHashedSetIndex(o.ID)
   392  	saver := z.serverPools[o.pool].sets[o.set]
   394  	// Disconnect from call above, but cancel on exit.
   395  	listCtx, cancel := context.WithCancel(GlobalContext)
   396  	saveCh := make(chan metaCacheEntry, metacacheBlockSize)
   397  	inCh := make(chan metaCacheEntry, metacacheBlockSize)
   398  	outCh := make(chan metaCacheEntry, o.Limit)
   400  	filteredResults := o.gatherResults(ctx, outCh)
   402  	mc := o.newMetacache()
   403  	meta := metaCacheRPC{meta: &mc, cancel: cancel, rpc: globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)), o: *o}
   405  	// Save listing...
   406  	go func() {
   407  		if err := saver.saveMetaCacheStream(listCtx, &meta, saveCh); err != nil {
   408  			meta.setErr(err.Error())
   409  		}
   410  		cancel()
   411  	}()
   413  	// Do listing...
   414  	go func(o listPathOptions) {
   415  		err := z.listMerged(listCtx, o, inCh)
   416  		if err != nil {
   417  			meta.setErr(err.Error())
   418  		}
   419  		o.debugln("listAndSave: listing", o.ID, "finished with ", err)
   420  	}(*o)
   422  	// Keep track of when we return since we no longer have to send entries to output.
   423  	var funcReturned bool
   424  	var funcReturnedMu sync.Mutex
   425  	defer func() {
   426  		funcReturnedMu.Lock()
   427  		funcReturned = true
   428  		funcReturnedMu.Unlock()
   429  	}()
   430  	// Write listing to results and saver.
   431  	go func() {
   432  		var returned bool
   433  		for entry := range inCh {
   434  			if !returned {
   435  				funcReturnedMu.Lock()
   436  				returned = funcReturned
   437  				funcReturnedMu.Unlock()
   438  				outCh <- entry
   439  				if returned {
   440  					xioutil.SafeClose(outCh)
   441  				}
   442  			}
   443  			entry.reusable = returned
   444  			saveCh <- entry
   445  		}
   446  		if !returned {
   447  			xioutil.SafeClose(outCh)
   448  		}
   449  		xioutil.SafeClose(saveCh)
   450  	}()
   452  	return filteredResults()
   453  }