storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metacache.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"path"
    24  	"strings"
    25  	"time"
    26  
    27  	"storj.io/minio/cmd/logger"
    28  )
    29  
    30  type scanStatus uint8
    31  
    32  const (
    33  	scanStateNone scanStatus = iota
    34  	scanStateStarted
    35  	scanStateSuccess
    36  	scanStateError
    37  
    38  	// Time in which the initiator of a scan must have reported back.
    39  	metacacheMaxRunningAge = time.Minute
    40  
    41  	// metacacheBlockSize is the number of file/directory entries to have in each block.
    42  	metacacheBlockSize = 5000
    43  
    44  	// metacacheSharePrefix controls whether prefixes on dirty paths are always shared.
    45  	// This will make `test/a` and `test/b` share listings if they are concurrent.
    46  	// Enabling this will make cache sharing more likely and cause less IO,
    47  	// but may cause additional latency to some calls.
    48  	metacacheSharePrefix = false
    49  )
    50  
    51  //go:generate msgp -file $GOFILE -unexported
    52  
    53  // metacache contains a tracked cache entry.
    54  type metacache struct {
    55  	id           string     `msg:"id"`
    56  	bucket       string     `msg:"b"`
    57  	root         string     `msg:"root"`
    58  	recursive    bool       `msg:"rec"`
    59  	filter       string     `msg:"flt"`
    60  	status       scanStatus `msg:"stat"`
    61  	fileNotFound bool       `msg:"fnf"`
    62  	error        string     `msg:"err"`
    63  	started      time.Time  `msg:"st"`
    64  	ended        time.Time  `msg:"end"`
    65  	lastUpdate   time.Time  `msg:"u"`
    66  	lastHandout  time.Time  `msg:"lh"`
    67  	startedCycle uint64     `msg:"stc"`
    68  	endedCycle   uint64     `msg:"endc"`
    69  	dataVersion  uint8      `msg:"v"`
    70  }
    71  
    72  func (m *metacache) finished() bool {
    73  	return !m.ended.IsZero()
    74  }
    75  
    76  // matches returns whether the metacache matches the options given.
    77  func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool {
    78  	if o == nil {
    79  		return false
    80  	}
    81  
    82  	// Never return transient caches if there is no id.
    83  	if m.status == scanStateError || m.status == scanStateNone || m.dataVersion != metacacheStreamVersion {
    84  		o.debugf("cache %s state or stream version mismatch", m.id)
    85  		return false
    86  	}
    87  	if m.startedCycle < o.OldestCycle {
    88  		o.debugf("cache %s cycle too old", m.id)
    89  		return false
    90  	}
    91  
    92  	// Root of what we are looking for must at least have the same
    93  	if !strings.HasPrefix(o.BaseDir, m.root) {
    94  		o.debugf("cache %s prefix mismatch, cached:%v, want:%v", m.id, m.root, o.BaseDir)
    95  		return false
    96  	}
    97  	if m.filter != "" && strings.HasPrefix(m.filter, o.FilterPrefix) {
    98  		o.debugf("cache %s cannot be used because of filter %s", m.id, m.filter)
    99  		return false
   100  	}
   101  
   102  	if o.Recursive && !m.recursive {
   103  		o.debugf("cache %s not recursive", m.id)
   104  		// If this is recursive the cached listing must be as well.
   105  		return false
   106  	}
   107  	if o.Separator != slashSeparator && !m.recursive {
   108  		o.debugf("cache %s not slashsep and not recursive", m.id)
   109  		// Non slash separator requires recursive.
   110  		return false
   111  	}
   112  	if !m.finished() && time.Since(m.lastUpdate) > metacacheMaxRunningAge {
   113  		o.debugf("cache %s not running, time: %v", m.id, time.Since(m.lastUpdate))
   114  		// Abandoned
   115  		return false
   116  	}
   117  
   118  	if m.finished() && m.endedCycle <= o.OldestCycle {
   119  		if extend <= 0 {
   120  			// If scan has ended the oldest requested must be less.
   121  			o.debugf("cache %s ended and cycle (%v) <= oldest allowed (%v)", m.id, m.endedCycle, o.OldestCycle)
   122  			return false
   123  		}
   124  		if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend {
   125  			// Cache ended within bloom cycle, but we can extend the life.
   126  			o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, metacacheMaxRunningAge+extend)
   127  			return false
   128  		}
   129  	}
   130  
   131  	return true
   132  }
   133  
   134  // worthKeeping indicates if the cache by itself is worth keeping.
   135  func (m *metacache) worthKeeping(currentCycle uint64) bool {
   136  	if m == nil {
   137  		return false
   138  	}
   139  	cache := m
   140  	switch {
   141  	case !cache.finished() && time.Since(cache.lastUpdate) > metacacheMaxRunningAge:
   142  		// Not finished and update for metacacheMaxRunningAge, discard it.
   143  		return false
   144  	case cache.finished() && cache.startedCycle > currentCycle:
   145  		// Cycle is somehow bigger.
   146  		return false
   147  	case cache.finished() && time.Since(cache.lastHandout) > 48*time.Hour:
   148  		// Keep only for 2 days. Fallback if scanner is clogged.
   149  		return false
   150  	case cache.finished() && currentCycle >= dataUsageUpdateDirCycles && cache.startedCycle < currentCycle-dataUsageUpdateDirCycles:
   151  		// Cycle is too old to be valuable.
   152  		return false
   153  	case cache.status == scanStateError || cache.status == scanStateNone:
   154  		// Remove failed listings after 5 minutes.
   155  		return time.Since(cache.lastUpdate) < 5*time.Minute
   156  	}
   157  	return true
   158  }
   159  
   160  // canBeReplacedBy.
   161  // Both must pass the worthKeeping check.
   162  func (m *metacache) canBeReplacedBy(other *metacache) bool {
   163  	// If the other is older it can never replace.
   164  	if other.started.Before(m.started) || m.id == other.id {
   165  		return false
   166  	}
   167  	if other.status == scanStateNone || other.status == scanStateError {
   168  		return false
   169  	}
   170  	if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge {
   171  		return false
   172  	}
   173  
   174  	// Keep it around a bit longer.
   175  	if time.Since(m.lastHandout) < 30*time.Minute || time.Since(m.lastUpdate) < metacacheMaxRunningAge {
   176  		return false
   177  	}
   178  
   179  	// Go through recursive combinations.
   180  	switch {
   181  	case !m.recursive && !other.recursive:
   182  		// If both not recursive root must match.
   183  		return m.root == other.root && strings.HasPrefix(m.filter, other.filter)
   184  	case m.recursive && !other.recursive:
   185  		// A recursive can never be replaced by a non-recursive
   186  		return false
   187  	case !m.recursive && other.recursive:
   188  		// If other is recursive it must contain this root
   189  		return strings.HasPrefix(m.root, other.root) && other.filter == ""
   190  	case m.recursive && other.recursive:
   191  		// Similar if both are recursive
   192  		return strings.HasPrefix(m.root, other.root) && other.filter == ""
   193  	}
   194  	panic("should be unreachable")
   195  }
   196  
   197  // baseDirFromPrefix will return the base directory given an object path.
   198  // For example an object with name prefix/folder/object.ext will return `prefix/folder/`.
   199  func baseDirFromPrefix(prefix string) string {
   200  	b := path.Dir(prefix)
   201  	if b == "." || b == "./" || b == "/" {
   202  		b = ""
   203  	}
   204  	if !strings.Contains(prefix, slashSeparator) {
   205  		b = ""
   206  	}
   207  	if len(b) > 0 && !strings.HasSuffix(b, slashSeparator) {
   208  		b += slashSeparator
   209  	}
   210  	return b
   211  }
   212  
   213  // update cache with new status.
   214  // The updates are conditional so multiple callers can update with different states.
   215  func (m *metacache) update(update metacache) {
   216  	m.lastUpdate = UTCNow()
   217  
   218  	if m.status == scanStateStarted && update.status == scanStateSuccess {
   219  		m.ended = UTCNow()
   220  		m.endedCycle = update.endedCycle
   221  	}
   222  
   223  	if m.status == scanStateStarted && update.status != scanStateStarted {
   224  		m.status = update.status
   225  	}
   226  
   227  	if m.error == "" && update.error != "" {
   228  		m.error = update.error
   229  		m.status = scanStateError
   230  		m.ended = UTCNow()
   231  	}
   232  	m.fileNotFound = m.fileNotFound || update.fileNotFound
   233  }
   234  
   235  // delete all cache data on disks.
   236  func (m *metacache) delete(ctx context.Context) {
   237  	if m.bucket == "" || m.id == "" {
   238  		logger.LogIf(ctx, fmt.Errorf("metacache.delete: bucket (%s) or id (%s) empty", m.bucket, m.id))
   239  	}
   240  	objAPI := newObjectLayerFn()
   241  	if objAPI == nil {
   242  		logger.LogIf(ctx, errors.New("metacache.delete: no object layer"))
   243  		return
   244  	}
   245  	ez, ok := objAPI.(*erasureServerPools)
   246  	if !ok {
   247  		logger.LogIf(ctx, errors.New("metacache.delete: expected objAPI to be *erasureServerPools"))
   248  		return
   249  	}
   250  	ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(m.bucket, m.id))
   251  }