storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/tree-walk.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2016 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"context"
    21  	"sort"
    22  	"strings"
    23  )
    24  
    25  // TreeWalkResult - Tree walk result carries results of tree walking.
    26  type TreeWalkResult struct {
    27  	entry      string
    28  	isEmptyDir bool
    29  	end        bool
    30  }
    31  
    32  // Return entries that have prefix prefixEntry.
    33  // The supplied entries are modified and the returned string is a subslice of entries.
    34  func filterMatchingPrefix(entries []string, prefixEntry string) []string {
    35  	if len(entries) == 0 || prefixEntry == "" {
    36  		return entries
    37  	}
    38  	// Write to the beginning of entries.
    39  	dst := entries[:0]
    40  	for _, s := range entries {
    41  		if !HasPrefix(s, prefixEntry) {
    42  			continue
    43  		}
    44  		dst = append(dst, s)
    45  	}
    46  	return dst
    47  }
    48  
    49  // xl.ListDir returns entries with trailing "/" for directories. At the object layer
    50  // we need to remove this trailing "/" for objects and retain "/" for prefixes before
    51  // sorting because the trailing "/" can affect the sorting results for certain cases.
    52  // Ex. lets say entries = ["a-b/", "a/"] and both are objects.
    53  //     sorting with out trailing "/" = ["a", "a-b"]
    54  //     sorting with trailing "/"     = ["a-b/", "a/"]
    55  // Hence if entries[] does not have a case like the above example then isLeaf() check
    56  // can be delayed till the entry is pushed into the TreeWalkResult channel.
    57  // delayIsLeafCheck() returns true if isLeaf can be delayed or false if
    58  // isLeaf should be done in listDir()
    59  func delayIsLeafCheck(entries []string) bool {
    60  	for i, entry := range entries {
    61  		if HasSuffix(entry, globalDirSuffixWithSlash) {
    62  			return false
    63  		}
    64  		if i == len(entries)-1 {
    65  			break
    66  		}
    67  		// If any byte in the "entry" string is less than '/' then the
    68  		// next "entry" should not contain '/' at the same same byte position.
    69  		for j := 0; j < len(entry); j++ {
    70  			if entry[j] < '/' {
    71  				if len(entries[i+1]) > j {
    72  					if entries[i+1][j] == '/' {
    73  						return false
    74  					}
    75  				}
    76  			}
    77  		}
    78  	}
    79  	return true
    80  }
    81  
    82  // ListDirFunc - "listDir" function of type listDirFunc returned by listDirFactory() - explained below.
    83  type ListDirFunc func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool)
    84  
    85  // IsLeafFunc - A function isLeaf of type isLeafFunc is used to detect if an
    86  // entry is a leaf entry. There are 2 scenarios where isLeaf should behave
    87  // differently depending on the backend:
    88  // 1. FS backend object listing - isLeaf is true if the entry
    89  //    has no trailing "/"
    90  // 2. Erasure backend object listing - isLeaf is true if the entry
    91  //    is a directory and contains xl.meta
    92  type IsLeafFunc func(string, string) bool
    93  
    94  // IsLeafDirFunc - A function isLeafDir of type isLeafDirFunc is used to detect
    95  // if an entry is empty directory.
    96  type IsLeafDirFunc func(string, string) bool
    97  
    98  func filterListEntries(bucket, prefixDir string, entries []string, prefixEntry string, isLeaf IsLeafFunc) ([]string, bool) {
    99  	// Filter entries that have the prefix prefixEntry.
   100  	entries = filterMatchingPrefix(entries, prefixEntry)
   101  
   102  	// Listing needs to be sorted.
   103  	sort.Slice(entries, func(i, j int) bool {
   104  		if !HasSuffix(entries[i], globalDirSuffixWithSlash) && !HasSuffix(entries[j], globalDirSuffixWithSlash) {
   105  			return entries[i] < entries[j]
   106  		}
   107  		first := entries[i]
   108  		second := entries[j]
   109  		if HasSuffix(first, globalDirSuffixWithSlash) {
   110  			first = strings.TrimSuffix(first, globalDirSuffixWithSlash) + slashSeparator
   111  		}
   112  		if HasSuffix(second, globalDirSuffixWithSlash) {
   113  			second = strings.TrimSuffix(second, globalDirSuffixWithSlash) + slashSeparator
   114  		}
   115  		return first < second
   116  	})
   117  
   118  	// Can isLeaf() check be delayed till when it has to be sent down the
   119  	// TreeWalkResult channel?
   120  	delayIsLeaf := delayIsLeafCheck(entries)
   121  	if delayIsLeaf {
   122  		return entries, true
   123  	}
   124  
   125  	// isLeaf() check has to happen here so that trailing "/" for objects can be removed.
   126  	for i, entry := range entries {
   127  		if isLeaf(bucket, pathJoin(prefixDir, entry)) {
   128  			entries[i] = strings.TrimSuffix(entry, slashSeparator)
   129  		}
   130  	}
   131  
   132  	// Sort again after removing trailing "/" for objects as the previous sort
   133  	// does not hold good anymore.
   134  	sort.Slice(entries, func(i, j int) bool {
   135  		if !HasSuffix(entries[i], globalDirSuffix) && !HasSuffix(entries[j], globalDirSuffix) {
   136  			return entries[i] < entries[j]
   137  		}
   138  		first := entries[i]
   139  		second := entries[j]
   140  		if HasSuffix(first, globalDirSuffix) {
   141  			first = strings.TrimSuffix(first, globalDirSuffix) + slashSeparator
   142  		}
   143  		if HasSuffix(second, globalDirSuffix) {
   144  			second = strings.TrimSuffix(second, globalDirSuffix) + slashSeparator
   145  		}
   146  		if first == second {
   147  			return HasSuffix(entries[i], globalDirSuffix)
   148  		}
   149  		return first < second
   150  	})
   151  	return entries, false
   152  }
   153  
   154  // treeWalk walks directory tree recursively pushing TreeWalkResult into the channel as and when it encounters files.
   155  func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, resultCh chan TreeWalkResult, endWalkCh <-chan struct{}, isEnd bool) (emptyDir bool, treeErr error) {
   156  	// Example:
   157  	// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
   158  	// called with prefixDir="one/two/three/four/" and marker="five.txt"
   159  
   160  	var markerBase, markerDir string
   161  	if marker != "" {
   162  		// Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt"
   163  		markerSplit := strings.SplitN(marker, SlashSeparator, 2)
   164  		markerDir = markerSplit[0]
   165  		if len(markerSplit) == 2 {
   166  			markerDir += SlashSeparator
   167  			markerBase = markerSplit[1]
   168  		}
   169  	}
   170  
   171  	emptyDir, entries, delayIsLeaf := listDir(bucket, prefixDir, entryPrefixMatch)
   172  	// When isleaf check is delayed, make sure that it is set correctly here.
   173  	if delayIsLeaf && isLeaf == nil || isLeafDir == nil {
   174  		return false, errInvalidArgument
   175  	}
   176  
   177  	// For an empty list return right here.
   178  	if emptyDir {
   179  		return true, nil
   180  	}
   181  
   182  	// example:
   183  	// If markerDir="four/" Search() returns the index of "four/" in the sorted
   184  	// entries list so we skip all the entries till "four/"
   185  	idx := sort.Search(len(entries), func(i int) bool {
   186  		return entries[i] >= markerDir
   187  	})
   188  	entries = entries[idx:]
   189  	// For an empty list after search through the entries, return right here.
   190  	if len(entries) == 0 {
   191  		return false, nil
   192  	}
   193  
   194  	for i, entry := range entries {
   195  		var leaf, leafDir bool
   196  
   197  		// Decision to do isLeaf check was pushed from listDir() to here.
   198  		if delayIsLeaf {
   199  			leaf = isLeaf(bucket, pathJoin(prefixDir, entry))
   200  			if leaf {
   201  				entry = strings.TrimSuffix(entry, slashSeparator)
   202  			}
   203  		} else {
   204  			leaf = !HasSuffix(entry, slashSeparator)
   205  		}
   206  
   207  		if HasSuffix(entry, slashSeparator) {
   208  			leafDir = isLeafDir(bucket, pathJoin(prefixDir, entry))
   209  		}
   210  
   211  		isDir := !leafDir && !leaf
   212  
   213  		if i == 0 && markerDir == entry {
   214  			if !recursive {
   215  				// Skip as the marker would already be listed in the previous listing.
   216  				continue
   217  			}
   218  			if recursive && !isDir {
   219  				// We should not skip for recursive listing and if markerDir is a directory
   220  				// for ex. if marker is "four/five.txt" markerDir will be "four/" which
   221  				// should not be skipped, instead it will need to be treeWalk()'ed into.
   222  
   223  				// Skip if it is a file though as it would be listed in previous listing.
   224  				continue
   225  			}
   226  		}
   227  		if recursive && isDir {
   228  			// If the entry is a directory, we will need recurse into it.
   229  			markerArg := ""
   230  			if entry == markerDir {
   231  				// We need to pass "five.txt" as marker only if we are
   232  				// recursing into "four/"
   233  				markerArg = markerBase
   234  			}
   235  			prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
   236  			// markIsEnd is passed to this entry's treeWalk() so that treeWalker.end can be marked
   237  			// true at the end of the treeWalk stream.
   238  			markIsEnd := i == len(entries)-1 && isEnd
   239  			emptyDir, err := doTreeWalk(ctx, bucket, pathJoin(prefixDir, entry), prefixMatch, markerArg, recursive,
   240  				listDir, isLeaf, isLeafDir, resultCh, endWalkCh, markIsEnd)
   241  			if err != nil {
   242  				return false, err
   243  			}
   244  
   245  			// A nil totalFound means this is an empty directory that
   246  			// needs to be sent to the result channel, otherwise continue
   247  			// to the next entry.
   248  			if !emptyDir {
   249  				continue
   250  			}
   251  		}
   252  
   253  		// EOF is set if we are at last entry and the caller indicated we at the end.
   254  		isEOF := ((i == len(entries)-1) && isEnd)
   255  		select {
   256  		case <-endWalkCh:
   257  			return false, errWalkAbort
   258  		case resultCh <- TreeWalkResult{entry: pathJoin(prefixDir, entry), isEmptyDir: leafDir, end: isEOF}:
   259  		}
   260  	}
   261  
   262  	// Everything is listed.
   263  	return false, nil
   264  }
   265  
   266  // Initiate a new treeWalk in a goroutine.
   267  func startTreeWalk(ctx context.Context, bucket, prefix, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, endWalkCh <-chan struct{}) chan TreeWalkResult {
   268  	// Example 1
   269  	// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
   270  	// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
   271  	// and entryPrefixMatch=""
   272  
   273  	// Example 2
   274  	// if prefix is "one/two/th" and marker is "one/two/three/four/five.txt"
   275  	// treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt"
   276  	// and entryPrefixMatch="th"
   277  
   278  	resultCh := make(chan TreeWalkResult, maxObjectList)
   279  	entryPrefixMatch := prefix
   280  	prefixDir := ""
   281  	lastIndex := strings.LastIndex(prefix, SlashSeparator)
   282  	if lastIndex != -1 {
   283  		entryPrefixMatch = prefix[lastIndex+1:]
   284  		prefixDir = prefix[:lastIndex+1]
   285  	}
   286  	marker = strings.TrimPrefix(marker, prefixDir)
   287  	go func() {
   288  		isEnd := true // Indication to start walking the tree with end as true.
   289  		doTreeWalk(ctx, bucket, prefixDir, entryPrefixMatch, marker, recursive, listDir, isLeaf, isLeafDir, resultCh, endWalkCh, isEnd)
   290  		close(resultCh)
   291  	}()
   292  	return resultCh
   293  }