github.com/git-lfs/git-lfs@v2.5.2+incompatible/tools/filetools.go (about) 1 // Package tools contains other helper functions too small to justify their own package 2 // NOTE: Subject to change, do not rely on this package from outside git-lfs source 3 package tools 4 5 import ( 6 "bufio" 7 "encoding/hex" 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "sync/atomic" 17 18 "github.com/git-lfs/git-lfs/filepathfilter" 19 ) 20 21 // FileOrDirExists determines if a file/dir exists, returns IsDir() results too. 22 func FileOrDirExists(path string) (exists bool, isDir bool) { 23 fi, err := os.Stat(path) 24 if err != nil { 25 return false, false 26 } else { 27 return true, fi.IsDir() 28 } 29 } 30 31 // FileExists determines if a file (NOT dir) exists. 32 func FileExists(path string) bool { 33 ret, isDir := FileOrDirExists(path) 34 return ret && !isDir 35 } 36 37 // DirExists determines if a dir (NOT file) exists. 38 func DirExists(path string) bool { 39 ret, isDir := FileOrDirExists(path) 40 return ret && isDir 41 } 42 43 // FileExistsOfSize determines if a file exists and is of a specific size. 44 func FileExistsOfSize(path string, sz int64) bool { 45 fi, err := os.Stat(path) 46 47 if err != nil { 48 return false 49 } 50 51 return !fi.IsDir() && fi.Size() == sz 52 } 53 54 // ResolveSymlinks ensures that if the path supplied is a symlink, it is 55 // resolved to the actual concrete path 56 func ResolveSymlinks(path string) string { 57 if len(path) == 0 { 58 return path 59 } 60 61 if resolved, err := filepath.EvalSymlinks(path); err == nil { 62 return resolved 63 } 64 return path 65 } 66 67 // RenameFileCopyPermissions moves srcfile to destfile, replacing destfile if 68 // necessary and also copying the permissions of destfile if it already exists 69 func RenameFileCopyPermissions(srcfile, destfile string) error { 70 info, err := os.Stat(destfile) 71 if os.IsNotExist(err) { 72 // no original file 73 } else if err != nil { 74 return err 75 } else { 76 if err := os.Chmod(srcfile, info.Mode()); err != nil { 77 return fmt.Errorf("can't set filemode on file %q: %v", srcfile, err) 78 } 79 } 80 81 if err := os.Rename(srcfile, destfile); err != nil { 82 return fmt.Errorf("cannot replace %q with %q: %v", destfile, srcfile, err) 83 } 84 return nil 85 } 86 87 // CleanPaths splits the given `paths` argument by the delimiter argument, and 88 // then "cleans" that path according to the path.Clean function (see 89 // https://golang.org/pkg/path#Clean). 90 // Note always cleans to '/' path separators regardless of platform (git friendly) 91 func CleanPaths(paths, delim string) (cleaned []string) { 92 // If paths is an empty string, splitting it will yield [""], which will 93 // become the path ".". To avoid this, bail out if trimmed paths 94 // argument is empty. 95 if paths = strings.TrimSpace(paths); len(paths) == 0 { 96 return 97 } 98 99 for _, part := range strings.Split(paths, delim) { 100 part = strings.TrimSpace(part) 101 102 // Remove trailing `/` or `\`, but only the first one. 103 for _, sep := range []string{`/`, `\`} { 104 if strings.HasSuffix(part, sep) { 105 part = strings.TrimSuffix(part, sep) 106 break 107 } 108 } 109 110 cleaned = append(cleaned, part) 111 } 112 113 return cleaned 114 } 115 116 // VerifyFileHash reads a file and verifies whether the SHA is correct 117 // Returns an error if there is a problem 118 func VerifyFileHash(oid, path string) error { 119 f, err := os.Open(path) 120 if err != nil { 121 return err 122 } 123 defer f.Close() 124 125 h := NewLfsContentHash() 126 _, err = io.Copy(h, f) 127 if err != nil { 128 return err 129 } 130 131 calcOid := hex.EncodeToString(h.Sum(nil)) 132 if calcOid != oid { 133 return fmt.Errorf("File %q has an invalid hash %s, expected %s", path, calcOid, oid) 134 } 135 136 return nil 137 } 138 139 // FastWalkCallback is the signature for the callback given to FastWalkGitRepo() 140 type FastWalkCallback func(parentDir string, info os.FileInfo, err error) 141 142 // FastWalkGitRepo is a more optimal implementation of filepath.Walk for a Git 143 // repo. The callback guaranteed to be called sequentially. The function returns 144 // once all files and errors have triggered callbacks. 145 // It differs in the following ways: 146 // * Uses goroutines to parallelise large dirs and descent into subdirs 147 // * Does not provide sorted output; parents will always be before children but 148 // there are no other guarantees. Use parentDir argument in the callback to 149 // determine absolute path rather than tracking it yourself 150 // * Automatically ignores any .git directories 151 // * Respects .gitignore contents and skips ignored files/dirs 152 // 153 // rootDir - Absolute path to the top of the repository working directory 154 func FastWalkGitRepo(rootDir string, cb FastWalkCallback) { 155 walker := fastWalkWithExcludeFiles(rootDir, ".gitignore") 156 for file := range walker.ch { 157 cb(file.ParentDir, file.Info, file.Err) 158 } 159 } 160 161 // Returned from FastWalk with parent directory context 162 // This is needed because FastWalk can provide paths out of order so the 163 // parent dir cannot be implied 164 type fastWalkInfo struct { 165 ParentDir string 166 Info os.FileInfo 167 Err error 168 } 169 170 type fastWalker struct { 171 rootDir string 172 excludeFilename string 173 ch chan fastWalkInfo 174 limit int32 175 cur *int32 176 wg *sync.WaitGroup 177 } 178 179 // fastWalkWithExcludeFiles walks the contents of a dir, respecting 180 // include/exclude patterns and also loading new exlude patterns from files 181 // named excludeFilename in directories walked 182 // 183 // rootDir - Absolute path to the top of the repository working directory 184 func fastWalkWithExcludeFiles(rootDir, excludeFilename string) *fastWalker { 185 excludePaths := []filepathfilter.Pattern{ 186 filepathfilter.NewPattern(".git"), 187 filepathfilter.NewPattern("**/.git"), 188 } 189 190 limit, _ := strconv.Atoi(os.Getenv("LFS_FASTWALK_LIMIT")) 191 if limit < 1 { 192 limit = runtime.GOMAXPROCS(-1) * 20 193 } 194 195 c := int32(0) 196 w := &fastWalker{ 197 rootDir: rootDir, 198 excludeFilename: excludeFilename, 199 limit: int32(limit), 200 cur: &c, 201 ch: make(chan fastWalkInfo, 256), 202 wg: &sync.WaitGroup{}, 203 } 204 205 go func() { 206 dirFi, err := os.Stat(w.rootDir) 207 if err != nil { 208 w.ch <- fastWalkInfo{Err: err} 209 return 210 } 211 212 w.Walk(true, "", dirFi, excludePaths) 213 w.Wait() 214 }() 215 return w 216 } 217 218 // Walk is the main recursive implementation of fast walk. 219 // Sends the file/dir and any contents to the channel so long as it passes the 220 // include/exclude filter. If a dir, parses any excludeFilename found and updates 221 // the excludePaths with its content before (parallel) recursing into contents 222 // Also splits large directories into multiple goroutines. 223 // Increments waitg.Add(1) for each new goroutine launched internally 224 // 225 // workDir - Relative path inside the repository 226 func (w *fastWalker) Walk(isRoot bool, workDir string, itemFi os.FileInfo, 227 excludePaths []filepathfilter.Pattern) { 228 229 var fullPath string // Absolute path to the current file or dir 230 var parentWorkDir string // Absolute path to the workDir inside the repository 231 if isRoot { 232 fullPath = w.rootDir 233 } else { 234 parentWorkDir = join(w.rootDir, workDir) 235 fullPath = join(parentWorkDir, itemFi.Name()) 236 } 237 238 workPath := join(workDir, itemFi.Name()) 239 if !filepathfilter.NewFromPatterns(nil, excludePaths).Allows(workPath) { 240 return 241 } 242 243 w.ch <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi} 244 245 if !itemFi.IsDir() { 246 // Nothing more to do if this is not a dir 247 return 248 } 249 250 var childWorkDir string 251 if !isRoot { 252 childWorkDir = join(workDir, itemFi.Name()) 253 } 254 255 if len(w.excludeFilename) > 0 { 256 possibleExcludeFile := join(fullPath, w.excludeFilename) 257 var err error 258 excludePaths, err = loadExcludeFilename(possibleExcludeFile, childWorkDir, excludePaths) 259 if err != nil { 260 w.ch <- fastWalkInfo{Err: err} 261 } 262 } 263 264 // The absolute optimal way to scan would be File.Readdirnames but we 265 // still need the Stat() to know whether something is a dir, so use 266 // File.Readdir instead. Means we can provide os.FileInfo to callers like 267 // filepath.Walk as a bonus. 268 df, err := os.Open(fullPath) 269 if err != nil { 270 w.ch <- fastWalkInfo{Err: err} 271 return 272 } 273 274 // The number of items in a dir we process in each goroutine 275 jobSize := 100 276 for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) { 277 // Parallelise all dirs, and chop large dirs into batches 278 w.walk(children, func(subitems []os.FileInfo) { 279 for _, childFi := range subitems { 280 w.Walk(false, childWorkDir, childFi, excludePaths) 281 } 282 }) 283 } 284 285 df.Close() 286 if err != nil && err != io.EOF { 287 w.ch <- fastWalkInfo{Err: err} 288 } 289 } 290 291 func (w *fastWalker) walk(children []os.FileInfo, fn func([]os.FileInfo)) { 292 cur := atomic.AddInt32(w.cur, 1) 293 if cur > w.limit { 294 fn(children) 295 atomic.AddInt32(w.cur, -1) 296 return 297 } 298 299 w.wg.Add(1) 300 go func() { 301 fn(children) 302 w.wg.Done() 303 atomic.AddInt32(w.cur, -1) 304 }() 305 } 306 307 func (w *fastWalker) Wait() { 308 w.wg.Wait() 309 close(w.ch) 310 } 311 312 // loadExcludeFilename reads the given file in gitignore format and returns a 313 // revised array of exclude paths if there are any changes. 314 // If any changes are made a copy of the array is taken so the original is not 315 // modified 316 func loadExcludeFilename(filename, workDir string, excludePaths []filepathfilter.Pattern) ([]filepathfilter.Pattern, error) { 317 f, err := os.OpenFile(filename, os.O_RDONLY, 0644) 318 if err != nil { 319 if os.IsNotExist(err) { 320 return excludePaths, nil 321 } 322 return excludePaths, err 323 } 324 defer f.Close() 325 326 retPaths := excludePaths 327 modified := false 328 329 scanner := bufio.NewScanner(f) 330 for scanner.Scan() { 331 line := strings.TrimSpace(scanner.Text()) 332 // Skip blanks, comments and negations (not supported right now) 333 if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") { 334 continue 335 } 336 337 if !modified { 338 // copy on write 339 retPaths = make([]filepathfilter.Pattern, len(excludePaths)) 340 copy(retPaths, excludePaths) 341 modified = true 342 } 343 344 path := line 345 // Add pattern in context if exclude has separator, or no wildcard 346 // Allow for both styles of separator at this point 347 if strings.ContainsAny(path, "/\\") || 348 !strings.Contains(path, "*") { 349 path = join(workDir, line) 350 } 351 retPaths = append(retPaths, filepathfilter.NewPattern(path)) 352 } 353 354 return retPaths, nil 355 } 356 357 func join(paths ...string) string { 358 ne := make([]string, 0, len(paths)) 359 360 for _, p := range paths { 361 if len(p) > 0 { 362 ne = append(ne, p) 363 } 364 } 365 return strings.Join(ne, "/") 366 } 367 368 // SetFileWriteFlag changes write permissions on a file 369 // Used to make a file read-only or not. When writeEnabled = false, the write 370 // bit is removed for all roles. When writeEnabled = true, the behaviour is 371 // different per platform: 372 // On Mac & Linux, the write bit is set only on the owner as per default umask. 373 // All other bits are unaffected. 374 // On Windows, all the write bits are set since Windows doesn't support Unix permissions. 375 func SetFileWriteFlag(path string, writeEnabled bool) error { 376 stat, err := os.Stat(path) 377 if err != nil { 378 return err 379 } 380 mode := uint32(stat.Mode()) 381 382 if (writeEnabled && (mode&0200) > 0) || 383 (!writeEnabled && (mode&0222) == 0) { 384 // no change needed 385 return nil 386 } 387 388 if writeEnabled { 389 mode = mode | 0200 // set owner write only 390 // Go's own Chmod makes Windows set all though 391 } else { 392 mode = mode &^ 0222 // disable all write 393 } 394 return os.Chmod(path, os.FileMode(mode)) 395 }