storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metacache-entries.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"io"
    22  	"os"
    23  	"sort"
    24  	"strings"
    25  )
    26  
    27  // metaCacheEntry is an object or a directory within an unknown bucket.
    28  type metaCacheEntry struct {
    29  	// name is the full name of the object including prefixes
    30  	name string
    31  	// Metadata. If none is present it is not an object but only a prefix.
    32  	// Entries without metadata will only be present in non-recursive scans.
    33  	metadata []byte
    34  
    35  	// cached contains the metadata if decoded.
    36  	cached *FileInfo
    37  }
    38  
    39  // isDir returns if the entry is representing a prefix directory.
    40  func (e metaCacheEntry) isDir() bool {
    41  	return len(e.metadata) == 0
    42  }
    43  
    44  // isObject returns if the entry is representing an object.
    45  func (e metaCacheEntry) isObject() bool {
    46  	return len(e.metadata) > 0
    47  }
    48  
    49  // hasPrefix returns whether an entry has a specific prefix
    50  func (e metaCacheEntry) hasPrefix(s string) bool {
    51  	return strings.HasPrefix(e.name, s)
    52  }
    53  
    54  // likelyMatches returns if the entries match by comparing name and metadata length.
    55  func (e *metaCacheEntry) likelyMatches(other *metaCacheEntry) bool {
    56  	// This should reject 99%
    57  	if len(e.metadata) != len(other.metadata) || e.name != other.name {
    58  		return false
    59  	}
    60  	return true
    61  }
    62  
    63  // matches returns if the entries match by comparing their latest version fileinfo.
    64  func (e *metaCacheEntry) matches(other *metaCacheEntry, bucket string) bool {
    65  	if e == nil && other == nil {
    66  		return true
    67  	}
    68  	if e == nil || other == nil {
    69  		return false
    70  	}
    71  
    72  	// This should reject 99%
    73  	if len(e.metadata) != len(other.metadata) || e.name != other.name {
    74  		return false
    75  	}
    76  
    77  	eFi, eErr := e.fileInfo(bucket)
    78  	oFi, oErr := other.fileInfo(bucket)
    79  	if eErr != nil || oErr != nil {
    80  		return eErr == oErr
    81  	}
    82  
    83  	return eFi.ModTime.Equal(oFi.ModTime) && eFi.Size == oFi.Size && eFi.VersionID == oFi.VersionID
    84  }
    85  
    86  // isInDir returns whether the entry is in the dir when considering the separator.
    87  func (e metaCacheEntry) isInDir(dir, separator string) bool {
    88  	if len(dir) == 0 {
    89  		// Root
    90  		idx := strings.Index(e.name, separator)
    91  		return idx == -1 || idx == len(e.name)-len(separator)
    92  	}
    93  	ext := strings.TrimPrefix(e.name, dir)
    94  	if len(ext) != len(e.name) {
    95  		idx := strings.Index(ext, separator)
    96  		// If separator is not found or is last entry, ok.
    97  		return idx == -1 || idx == len(ext)-len(separator)
    98  	}
    99  	return false
   100  }
   101  
   102  // isLatestDeletemarker returns whether the latest version is a delete marker.
   103  // If metadata is NOT versioned false will always be returned.
   104  // If v2 and UNABLE to load metadata true will be returned.
   105  func (e *metaCacheEntry) isLatestDeletemarker() bool {
   106  	if e.cached != nil {
   107  		return e.cached.Deleted
   108  	}
   109  	if !isXL2V1Format(e.metadata) {
   110  		return false
   111  	}
   112  	var xlMeta xlMetaV2
   113  	if err := xlMeta.Load(e.metadata); err != nil || len(xlMeta.Versions) == 0 {
   114  		return true
   115  	}
   116  	return xlMeta.Versions[len(xlMeta.Versions)-1].Type == DeleteType
   117  }
   118  
   119  // fileInfo returns the decoded metadata.
   120  // If entry is a directory it is returned as that.
   121  // If versioned the latest version will be returned.
   122  func (e *metaCacheEntry) fileInfo(bucket string) (*FileInfo, error) {
   123  	if e.isDir() {
   124  		return &FileInfo{
   125  			Volume: bucket,
   126  			Name:   e.name,
   127  			Mode:   uint32(os.ModeDir),
   128  		}, nil
   129  	}
   130  	if e.cached == nil {
   131  		fi, err := getFileInfo(e.metadata, bucket, e.name, "", false)
   132  		if err != nil {
   133  			return nil, err
   134  		}
   135  		e.cached = &fi
   136  	}
   137  	return e.cached, nil
   138  }
   139  
   140  // fileInfoVersions returns the metadata as FileInfoVersions.
   141  // If entry is a directory it is returned as that.
   142  func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, error) {
   143  	if e.isDir() {
   144  		return FileInfoVersions{
   145  			Volume: bucket,
   146  			Name:   e.name,
   147  			Versions: []FileInfo{
   148  				{
   149  					Volume: bucket,
   150  					Name:   e.name,
   151  					Mode:   uint32(os.ModeDir),
   152  				},
   153  			},
   154  		}, nil
   155  	}
   156  	return getFileInfoVersions(e.metadata, bucket, e.name)
   157  }
   158  
   159  // metaCacheEntries is a slice of metacache entries.
   160  type metaCacheEntries []metaCacheEntry
   161  
   162  // less function for sorting.
   163  func (m metaCacheEntries) less(i, j int) bool {
   164  	return m[i].name < m[j].name
   165  }
   166  
   167  // sort entries by name.
   168  // m is sorted and a sorted metadata object is returned.
   169  // Changes to m will also be reflected in the returned object.
   170  func (m metaCacheEntries) sort() metaCacheEntriesSorted {
   171  	if m.isSorted() {
   172  		return metaCacheEntriesSorted{o: m}
   173  	}
   174  	sort.Slice(m, m.less)
   175  	return metaCacheEntriesSorted{o: m}
   176  }
   177  
   178  // isSorted returns whether the objects are sorted.
   179  // This is usually orders of magnitude faster than actually sorting.
   180  func (m metaCacheEntries) isSorted() bool {
   181  	return sort.SliceIsSorted(m, m.less)
   182  }
   183  
   184  // shallowClone will create a shallow clone of the array objects,
   185  // but object metadata will not be cloned.
   186  func (m metaCacheEntries) shallowClone() metaCacheEntries {
   187  	dst := make(metaCacheEntries, len(m))
   188  	copy(dst, m)
   189  	return dst
   190  }
   191  
   192  type metadataResolutionParams struct {
   193  	dirQuorum int    // Number if disks needed for a directory to 'exist'.
   194  	objQuorum int    // Number of disks needed for an object to 'exist'.
   195  	bucket    string // Name of the bucket. Used for generating cached fileinfo.
   196  }
   197  
   198  func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCacheEntry, ok bool) {
   199  	if len(m) == 0 {
   200  		return nil, false
   201  	}
   202  
   203  	dirExists := 0
   204  	objExists := 0
   205  	var selFIV *FileInfo
   206  	for i := range m {
   207  		entry := &m[i]
   208  		if entry.name == "" {
   209  			continue
   210  		}
   211  		if entry.isDir() {
   212  			dirExists++
   213  			selected = entry
   214  			continue
   215  		}
   216  
   217  		// Get new entry metadata
   218  		fiv, err := entry.fileInfo(r.bucket)
   219  		if err != nil {
   220  			continue
   221  		}
   222  
   223  		objExists++
   224  		if selFIV == nil {
   225  			selected = entry
   226  			selFIV = fiv
   227  			continue
   228  		}
   229  
   230  		if selected.matches(entry, r.bucket) {
   231  			continue
   232  		}
   233  	}
   234  
   235  	if selected == nil {
   236  		return nil, false
   237  	}
   238  
   239  	if selected.isDir() && dirExists < r.dirQuorum {
   240  		return nil, false
   241  	} else if !selected.isDir() && objExists < r.objQuorum {
   242  		return nil, false
   243  	}
   244  	return selected, true
   245  }
   246  
   247  // firstFound returns the first found and the number of set entries.
   248  func (m metaCacheEntries) firstFound() (first *metaCacheEntry, n int) {
   249  	for _, entry := range m {
   250  		if entry.name != "" {
   251  			n++
   252  			if first == nil {
   253  				first = &entry
   254  			}
   255  		}
   256  	}
   257  	return first, n
   258  }
   259  
   260  // names will return all names in order.
   261  // Since this allocates it should not be used in critical functions.
   262  func (m metaCacheEntries) names() []string {
   263  	res := make([]string, 0, len(m))
   264  	for _, obj := range m {
   265  		res = append(res, obj.name)
   266  	}
   267  	return res
   268  }
   269  
   270  // metaCacheEntriesSorted contains metacache entries that are sorted.
   271  type metaCacheEntriesSorted struct {
   272  	o metaCacheEntries
   273  	// list id is not serialized
   274  	listID string
   275  }
   276  
   277  // writeTo will write all objects to the provided output.
   278  func (m metaCacheEntriesSorted) writeTo(writer io.Writer) error {
   279  	w := newMetacacheWriter(writer, 1<<20)
   280  	if err := w.write(m.o...); err != nil {
   281  		w.Close()
   282  		return err
   283  	}
   284  	return w.Close()
   285  }
   286  
   287  // shallowClone will create a shallow clone of the array objects,
   288  // but object metadata will not be cloned.
   289  func (m metaCacheEntriesSorted) shallowClone() metaCacheEntriesSorted {
   290  	// We have value receiver so we already have a copy.
   291  	m.o = m.o.shallowClone()
   292  	return m
   293  }
   294  
   295  // iterate the entries in order.
   296  // If the iterator function returns iterating stops.
   297  func (m *metaCacheEntriesSorted) iterate(fn func(entry metaCacheEntry) (cont bool)) {
   298  	if m == nil {
   299  		return
   300  	}
   301  	for _, o := range m.o {
   302  		if !fn(o) {
   303  			return
   304  		}
   305  	}
   306  }
   307  
   308  // fileInfoVersions converts the metadata to FileInfoVersions where possible.
   309  // Metadata that cannot be decoded is skipped.
   310  func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, afterV string) (versions []ObjectInfo) {
   311  	versions = make([]ObjectInfo, 0, m.len())
   312  	prevPrefix := ""
   313  	for _, entry := range m.o {
   314  		if entry.isObject() {
   315  			if delimiter != "" {
   316  				idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
   317  				if idx >= 0 {
   318  					idx = len(prefix) + idx + len(delimiter)
   319  					currPrefix := entry.name[:idx]
   320  					if currPrefix == prevPrefix {
   321  						continue
   322  					}
   323  					prevPrefix = currPrefix
   324  					versions = append(versions, ObjectInfo{
   325  						IsDir:  true,
   326  						Bucket: bucket,
   327  						Name:   currPrefix,
   328  					})
   329  					continue
   330  				}
   331  			}
   332  
   333  			fiv, err := entry.fileInfoVersions(bucket)
   334  			if err != nil {
   335  				continue
   336  			}
   337  
   338  			fiVersions := fiv.Versions
   339  			if afterV != "" {
   340  				vidMarkerIdx := fiv.findVersionIndex(afterV)
   341  				if vidMarkerIdx >= 0 {
   342  					fiVersions = fiVersions[vidMarkerIdx+1:]
   343  				}
   344  				afterV = ""
   345  			}
   346  
   347  			for _, version := range fiVersions {
   348  				versions = append(versions, version.ToObjectInfo(bucket, entry.name))
   349  			}
   350  
   351  			continue
   352  		}
   353  
   354  		if entry.isDir() {
   355  			if delimiter == "" {
   356  				continue
   357  			}
   358  			idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
   359  			if idx < 0 {
   360  				continue
   361  			}
   362  			idx = len(prefix) + idx + len(delimiter)
   363  			currPrefix := entry.name[:idx]
   364  			if currPrefix == prevPrefix {
   365  				continue
   366  			}
   367  			prevPrefix = currPrefix
   368  			versions = append(versions, ObjectInfo{
   369  				IsDir:  true,
   370  				Bucket: bucket,
   371  				Name:   currPrefix,
   372  			})
   373  		}
   374  	}
   375  
   376  	return versions
   377  }
   378  
   379  // fileInfoVersions converts the metadata to FileInfoVersions where possible.
   380  // Metadata that cannot be decoded is skipped.
   381  func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (objects []ObjectInfo) {
   382  	objects = make([]ObjectInfo, 0, m.len())
   383  	prevPrefix := ""
   384  	for _, entry := range m.o {
   385  		if entry.isObject() {
   386  			if delimiter != "" {
   387  				idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
   388  				if idx >= 0 {
   389  					idx = len(prefix) + idx + len(delimiter)
   390  					currPrefix := entry.name[:idx]
   391  					if currPrefix == prevPrefix {
   392  						continue
   393  					}
   394  					prevPrefix = currPrefix
   395  					objects = append(objects, ObjectInfo{
   396  						IsDir:  true,
   397  						Bucket: bucket,
   398  						Name:   currPrefix,
   399  					})
   400  					continue
   401  				}
   402  			}
   403  
   404  			fi, err := entry.fileInfo(bucket)
   405  			if err == nil {
   406  				objects = append(objects, fi.ToObjectInfo(bucket, entry.name))
   407  			}
   408  			continue
   409  		}
   410  		if entry.isDir() {
   411  			if delimiter == "" {
   412  				continue
   413  			}
   414  			idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
   415  			if idx < 0 {
   416  				continue
   417  			}
   418  			idx = len(prefix) + idx + len(delimiter)
   419  			currPrefix := entry.name[:idx]
   420  			if currPrefix == prevPrefix {
   421  				continue
   422  			}
   423  			prevPrefix = currPrefix
   424  			objects = append(objects, ObjectInfo{
   425  				IsDir:  true,
   426  				Bucket: bucket,
   427  				Name:   currPrefix,
   428  			})
   429  		}
   430  	}
   431  
   432  	return objects
   433  }
   434  
   435  // forwardTo will truncate m so only entries that are s or after is in the list.
   436  func (m *metaCacheEntriesSorted) forwardTo(s string) {
   437  	if s == "" {
   438  		return
   439  	}
   440  	idx := sort.Search(len(m.o), func(i int) bool {
   441  		return m.o[i].name >= s
   442  	})
   443  	m.o = m.o[idx:]
   444  }
   445  
   446  // forwardPast will truncate m so only entries that are after s is in the list.
   447  func (m *metaCacheEntriesSorted) forwardPast(s string) {
   448  	if s == "" {
   449  		return
   450  	}
   451  	idx := sort.Search(len(m.o), func(i int) bool {
   452  		return m.o[i].name > s
   453  	})
   454  	m.o = m.o[idx:]
   455  }
   456  
   457  // merge will merge other into m.
   458  // If the same entries exists in both and metadata matches only one is added,
   459  // otherwise the entry from m will be placed first.
   460  // Operation time is expected to be O(n+m).
   461  func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) {
   462  	merged := make(metaCacheEntries, 0, m.len()+other.len())
   463  	a := m.entries()
   464  	b := other.entries()
   465  	for len(a) > 0 && len(b) > 0 {
   466  		if a[0].name == b[0].name && bytes.Equal(a[0].metadata, b[0].metadata) {
   467  			// Same, discard one.
   468  			merged = append(merged, a[0])
   469  			a = a[1:]
   470  			b = b[1:]
   471  		} else if a[0].name < b[0].name {
   472  			merged = append(merged, a[0])
   473  			a = a[1:]
   474  		} else {
   475  			merged = append(merged, b[0])
   476  			b = b[1:]
   477  		}
   478  		if limit > 0 && len(merged) >= limit {
   479  			break
   480  		}
   481  	}
   482  	// Append anything left.
   483  	if limit < 0 || len(merged) < limit {
   484  		merged = append(merged, a...)
   485  		merged = append(merged, b...)
   486  	}
   487  	m.o = merged
   488  }
   489  
   490  // filter allows selective filtering with the provided function.
   491  func (m *metaCacheEntriesSorted) filter(fn func(entry *metaCacheEntry) bool) {
   492  	dst := m.o[:0]
   493  	for _, o := range m.o {
   494  		if fn(&o) {
   495  			dst = append(dst, o)
   496  		}
   497  	}
   498  	m.o = dst
   499  }
   500  
   501  // filterPrefix will filter m to only contain entries with the specified prefix.
   502  func (m *metaCacheEntriesSorted) filterPrefix(s string) {
   503  	if s == "" {
   504  		return
   505  	}
   506  	m.forwardTo(s)
   507  	for i, o := range m.o {
   508  		if !o.hasPrefix(s) {
   509  			m.o = m.o[:i]
   510  			break
   511  		}
   512  	}
   513  }
   514  
   515  // filterObjectsOnly will remove prefix directories.
   516  // Order is preserved, but the underlying slice is modified.
   517  func (m *metaCacheEntriesSorted) filterObjectsOnly() {
   518  	dst := m.o[:0]
   519  	for _, o := range m.o {
   520  		if !o.isDir() {
   521  			dst = append(dst, o)
   522  		}
   523  	}
   524  	m.o = dst
   525  }
   526  
   527  // filterPrefixesOnly will remove objects.
   528  // Order is preserved, but the underlying slice is modified.
   529  func (m *metaCacheEntriesSorted) filterPrefixesOnly() {
   530  	dst := m.o[:0]
   531  	for _, o := range m.o {
   532  		if o.isDir() {
   533  			dst = append(dst, o)
   534  		}
   535  	}
   536  	m.o = dst
   537  }
   538  
   539  // filterRecursiveEntries will keep entries only with the prefix that doesn't contain separator.
   540  // This can be used to remove recursive listings.
   541  // To return root elements only set prefix to an empty string.
   542  // Order is preserved, but the underlying slice is modified.
   543  func (m *metaCacheEntriesSorted) filterRecursiveEntries(prefix, separator string) {
   544  	dst := m.o[:0]
   545  	if prefix != "" {
   546  		m.forwardTo(prefix)
   547  		for _, o := range m.o {
   548  			ext := strings.TrimPrefix(o.name, prefix)
   549  			if len(ext) != len(o.name) {
   550  				if !strings.Contains(ext, separator) {
   551  					dst = append(dst, o)
   552  				}
   553  			}
   554  		}
   555  	} else {
   556  		// No prefix, simpler
   557  		for _, o := range m.o {
   558  			if !strings.Contains(o.name, separator) {
   559  				dst = append(dst, o)
   560  			}
   561  		}
   562  	}
   563  	m.o = dst
   564  }
   565  
   566  // truncate the number of entries to maximum n.
   567  func (m *metaCacheEntriesSorted) truncate(n int) {
   568  	if m == nil {
   569  		return
   570  	}
   571  	if len(m.o) > n {
   572  		m.o = m.o[:n]
   573  	}
   574  }
   575  
   576  // len returns the number of objects and prefix dirs in m.
   577  func (m *metaCacheEntriesSorted) len() int {
   578  	if m == nil {
   579  		return 0
   580  	}
   581  	return len(m.o)
   582  }
   583  
   584  // entries returns the underlying objects as is currently represented.
   585  func (m *metaCacheEntriesSorted) entries() metaCacheEntries {
   586  	if m == nil {
   587  		return nil
   588  	}
   589  	return m.o
   590  }
   591  
   592  // deduplicate entries in the list.
   593  // If compareMeta is set it will be used to resolve conflicts.
   594  // The function should return whether the existing entry should be replaced with other.
   595  // If no compareMeta is provided duplicates may be left.
   596  // This is indicated by the returned boolean.
   597  func (m *metaCacheEntriesSorted) deduplicate(compareMeta func(existing, other *metaCacheEntry) (replace bool)) (dupesLeft bool) {
   598  	dst := m.o[:0]
   599  	for j := range m.o {
   600  		found := false
   601  		obj := &m.o[j]
   602  		for i := len(dst) - 1; i >= 0; i++ {
   603  			existing := &dst[i]
   604  			if existing.name != obj.name {
   605  				break
   606  			}
   607  
   608  			// Use given resolution function first if any.
   609  			if compareMeta != nil {
   610  				if compareMeta(existing, obj) {
   611  					dst[i] = *obj
   612  				}
   613  				found = true
   614  				break
   615  			}
   616  			if obj.likelyMatches(existing) {
   617  				found = true
   618  				break
   619  			}
   620  
   621  			// Matches, move on.
   622  			dupesLeft = true
   623  			continue
   624  		}
   625  		if !found {
   626  			dst = append(dst, *obj)
   627  		}
   628  	}
   629  	m.o = dst
   630  	return dupesLeft
   631  }