storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/tree-walk.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "sort" 22 "strings" 23 ) 24 25 // TreeWalkResult - Tree walk result carries results of tree walking. 26 type TreeWalkResult struct { 27 entry string 28 isEmptyDir bool 29 end bool 30 } 31 32 // Return entries that have prefix prefixEntry. 33 // The supplied entries are modified and the returned string is a subslice of entries. 34 func filterMatchingPrefix(entries []string, prefixEntry string) []string { 35 if len(entries) == 0 || prefixEntry == "" { 36 return entries 37 } 38 // Write to the beginning of entries. 39 dst := entries[:0] 40 for _, s := range entries { 41 if !HasPrefix(s, prefixEntry) { 42 continue 43 } 44 dst = append(dst, s) 45 } 46 return dst 47 } 48 49 // xl.ListDir returns entries with trailing "/" for directories. At the object layer 50 // we need to remove this trailing "/" for objects and retain "/" for prefixes before 51 // sorting because the trailing "/" can affect the sorting results for certain cases. 52 // Ex. lets say entries = ["a-b/", "a/"] and both are objects. 53 // sorting with out trailing "/" = ["a", "a-b"] 54 // sorting with trailing "/" = ["a-b/", "a/"] 55 // Hence if entries[] does not have a case like the above example then isLeaf() check 56 // can be delayed till the entry is pushed into the TreeWalkResult channel. 57 // delayIsLeafCheck() returns true if isLeaf can be delayed or false if 58 // isLeaf should be done in listDir() 59 func delayIsLeafCheck(entries []string) bool { 60 for i, entry := range entries { 61 if HasSuffix(entry, globalDirSuffixWithSlash) { 62 return false 63 } 64 if i == len(entries)-1 { 65 break 66 } 67 // If any byte in the "entry" string is less than '/' then the 68 // next "entry" should not contain '/' at the same same byte position. 69 for j := 0; j < len(entry); j++ { 70 if entry[j] < '/' { 71 if len(entries[i+1]) > j { 72 if entries[i+1][j] == '/' { 73 return false 74 } 75 } 76 } 77 } 78 } 79 return true 80 } 81 82 // ListDirFunc - "listDir" function of type listDirFunc returned by listDirFactory() - explained below. 83 type ListDirFunc func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool) 84 85 // IsLeafFunc - A function isLeaf of type isLeafFunc is used to detect if an 86 // entry is a leaf entry. There are 2 scenarios where isLeaf should behave 87 // differently depending on the backend: 88 // 1. FS backend object listing - isLeaf is true if the entry 89 // has no trailing "/" 90 // 2. Erasure backend object listing - isLeaf is true if the entry 91 // is a directory and contains xl.meta 92 type IsLeafFunc func(string, string) bool 93 94 // IsLeafDirFunc - A function isLeafDir of type isLeafDirFunc is used to detect 95 // if an entry is empty directory. 96 type IsLeafDirFunc func(string, string) bool 97 98 func filterListEntries(bucket, prefixDir string, entries []string, prefixEntry string, isLeaf IsLeafFunc) ([]string, bool) { 99 // Filter entries that have the prefix prefixEntry. 100 entries = filterMatchingPrefix(entries, prefixEntry) 101 102 // Listing needs to be sorted. 103 sort.Slice(entries, func(i, j int) bool { 104 if !HasSuffix(entries[i], globalDirSuffixWithSlash) && !HasSuffix(entries[j], globalDirSuffixWithSlash) { 105 return entries[i] < entries[j] 106 } 107 first := entries[i] 108 second := entries[j] 109 if HasSuffix(first, globalDirSuffixWithSlash) { 110 first = strings.TrimSuffix(first, globalDirSuffixWithSlash) + slashSeparator 111 } 112 if HasSuffix(second, globalDirSuffixWithSlash) { 113 second = strings.TrimSuffix(second, globalDirSuffixWithSlash) + slashSeparator 114 } 115 return first < second 116 }) 117 118 // Can isLeaf() check be delayed till when it has to be sent down the 119 // TreeWalkResult channel? 120 delayIsLeaf := delayIsLeafCheck(entries) 121 if delayIsLeaf { 122 return entries, true 123 } 124 125 // isLeaf() check has to happen here so that trailing "/" for objects can be removed. 126 for i, entry := range entries { 127 if isLeaf(bucket, pathJoin(prefixDir, entry)) { 128 entries[i] = strings.TrimSuffix(entry, slashSeparator) 129 } 130 } 131 132 // Sort again after removing trailing "/" for objects as the previous sort 133 // does not hold good anymore. 134 sort.Slice(entries, func(i, j int) bool { 135 if !HasSuffix(entries[i], globalDirSuffix) && !HasSuffix(entries[j], globalDirSuffix) { 136 return entries[i] < entries[j] 137 } 138 first := entries[i] 139 second := entries[j] 140 if HasSuffix(first, globalDirSuffix) { 141 first = strings.TrimSuffix(first, globalDirSuffix) + slashSeparator 142 } 143 if HasSuffix(second, globalDirSuffix) { 144 second = strings.TrimSuffix(second, globalDirSuffix) + slashSeparator 145 } 146 if first == second { 147 return HasSuffix(entries[i], globalDirSuffix) 148 } 149 return first < second 150 }) 151 return entries, false 152 } 153 154 // treeWalk walks directory tree recursively pushing TreeWalkResult into the channel as and when it encounters files. 155 func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, resultCh chan TreeWalkResult, endWalkCh <-chan struct{}, isEnd bool) (emptyDir bool, treeErr error) { 156 // Example: 157 // if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively 158 // called with prefixDir="one/two/three/four/" and marker="five.txt" 159 160 var markerBase, markerDir string 161 if marker != "" { 162 // Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt" 163 markerSplit := strings.SplitN(marker, SlashSeparator, 2) 164 markerDir = markerSplit[0] 165 if len(markerSplit) == 2 { 166 markerDir += SlashSeparator 167 markerBase = markerSplit[1] 168 } 169 } 170 171 emptyDir, entries, delayIsLeaf := listDir(bucket, prefixDir, entryPrefixMatch) 172 // When isleaf check is delayed, make sure that it is set correctly here. 173 if delayIsLeaf && isLeaf == nil || isLeafDir == nil { 174 return false, errInvalidArgument 175 } 176 177 // For an empty list return right here. 178 if emptyDir { 179 return true, nil 180 } 181 182 // example: 183 // If markerDir="four/" Search() returns the index of "four/" in the sorted 184 // entries list so we skip all the entries till "four/" 185 idx := sort.Search(len(entries), func(i int) bool { 186 return entries[i] >= markerDir 187 }) 188 entries = entries[idx:] 189 // For an empty list after search through the entries, return right here. 190 if len(entries) == 0 { 191 return false, nil 192 } 193 194 for i, entry := range entries { 195 var leaf, leafDir bool 196 197 // Decision to do isLeaf check was pushed from listDir() to here. 198 if delayIsLeaf { 199 leaf = isLeaf(bucket, pathJoin(prefixDir, entry)) 200 if leaf { 201 entry = strings.TrimSuffix(entry, slashSeparator) 202 } 203 } else { 204 leaf = !HasSuffix(entry, slashSeparator) 205 } 206 207 if HasSuffix(entry, slashSeparator) { 208 leafDir = isLeafDir(bucket, pathJoin(prefixDir, entry)) 209 } 210 211 isDir := !leafDir && !leaf 212 213 if i == 0 && markerDir == entry { 214 if !recursive { 215 // Skip as the marker would already be listed in the previous listing. 216 continue 217 } 218 if recursive && !isDir { 219 // We should not skip for recursive listing and if markerDir is a directory 220 // for ex. if marker is "four/five.txt" markerDir will be "four/" which 221 // should not be skipped, instead it will need to be treeWalk()'ed into. 222 223 // Skip if it is a file though as it would be listed in previous listing. 224 continue 225 } 226 } 227 if recursive && isDir { 228 // If the entry is a directory, we will need recurse into it. 229 markerArg := "" 230 if entry == markerDir { 231 // We need to pass "five.txt" as marker only if we are 232 // recursing into "four/" 233 markerArg = markerBase 234 } 235 prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories. 236 // markIsEnd is passed to this entry's treeWalk() so that treeWalker.end can be marked 237 // true at the end of the treeWalk stream. 238 markIsEnd := i == len(entries)-1 && isEnd 239 emptyDir, err := doTreeWalk(ctx, bucket, pathJoin(prefixDir, entry), prefixMatch, markerArg, recursive, 240 listDir, isLeaf, isLeafDir, resultCh, endWalkCh, markIsEnd) 241 if err != nil { 242 return false, err 243 } 244 245 // A nil totalFound means this is an empty directory that 246 // needs to be sent to the result channel, otherwise continue 247 // to the next entry. 248 if !emptyDir { 249 continue 250 } 251 } 252 253 // EOF is set if we are at last entry and the caller indicated we at the end. 254 isEOF := ((i == len(entries)-1) && isEnd) 255 select { 256 case <-endWalkCh: 257 return false, errWalkAbort 258 case resultCh <- TreeWalkResult{entry: pathJoin(prefixDir, entry), isEmptyDir: leafDir, end: isEOF}: 259 } 260 } 261 262 // Everything is listed. 263 return false, nil 264 } 265 266 // Initiate a new treeWalk in a goroutine. 267 func startTreeWalk(ctx context.Context, bucket, prefix, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, endWalkCh <-chan struct{}) chan TreeWalkResult { 268 // Example 1 269 // If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt" 270 // treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt" 271 // and entryPrefixMatch="" 272 273 // Example 2 274 // if prefix is "one/two/th" and marker is "one/two/three/four/five.txt" 275 // treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt" 276 // and entryPrefixMatch="th" 277 278 resultCh := make(chan TreeWalkResult, maxObjectList) 279 entryPrefixMatch := prefix 280 prefixDir := "" 281 lastIndex := strings.LastIndex(prefix, SlashSeparator) 282 if lastIndex != -1 { 283 entryPrefixMatch = prefix[lastIndex+1:] 284 prefixDir = prefix[:lastIndex+1] 285 } 286 marker = strings.TrimPrefix(marker, prefixDir) 287 go func() { 288 isEnd := true // Indication to start walking the tree with end as true. 289 doTreeWalk(ctx, bucket, prefixDir, entryPrefixMatch, marker, recursive, listDir, isLeaf, isLeafDir, resultCh, endWalkCh, isEnd) 290 close(resultCh) 291 }() 292 return resultCh 293 }