github.com/x-oss-byte/git-lfs@v2.5.2+incompatible/tools/filetools.go (about)

     1  // Package tools contains other helper functions too small to justify their own package
     2  // NOTE: Subject to change, do not rely on this package from outside git-lfs source
     3  package tools
     4  
     5  import (
     6  	"bufio"
     7  	"encoding/hex"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"sync/atomic"
    17  
    18  	"github.com/git-lfs/git-lfs/filepathfilter"
    19  )
    20  
    21  // FileOrDirExists determines if a file/dir exists, returns IsDir() results too.
    22  func FileOrDirExists(path string) (exists bool, isDir bool) {
    23  	fi, err := os.Stat(path)
    24  	if err != nil {
    25  		return false, false
    26  	} else {
    27  		return true, fi.IsDir()
    28  	}
    29  }
    30  
    31  // FileExists determines if a file (NOT dir) exists.
    32  func FileExists(path string) bool {
    33  	ret, isDir := FileOrDirExists(path)
    34  	return ret && !isDir
    35  }
    36  
    37  // DirExists determines if a dir (NOT file) exists.
    38  func DirExists(path string) bool {
    39  	ret, isDir := FileOrDirExists(path)
    40  	return ret && isDir
    41  }
    42  
    43  // FileExistsOfSize determines if a file exists and is of a specific size.
    44  func FileExistsOfSize(path string, sz int64) bool {
    45  	fi, err := os.Stat(path)
    46  
    47  	if err != nil {
    48  		return false
    49  	}
    50  
    51  	return !fi.IsDir() && fi.Size() == sz
    52  }
    53  
    54  // ResolveSymlinks ensures that if the path supplied is a symlink, it is
    55  // resolved to the actual concrete path
    56  func ResolveSymlinks(path string) string {
    57  	if len(path) == 0 {
    58  		return path
    59  	}
    60  
    61  	if resolved, err := filepath.EvalSymlinks(path); err == nil {
    62  		return resolved
    63  	}
    64  	return path
    65  }
    66  
    67  // RenameFileCopyPermissions moves srcfile to destfile, replacing destfile if
    68  // necessary and also copying the permissions of destfile if it already exists
    69  func RenameFileCopyPermissions(srcfile, destfile string) error {
    70  	info, err := os.Stat(destfile)
    71  	if os.IsNotExist(err) {
    72  		// no original file
    73  	} else if err != nil {
    74  		return err
    75  	} else {
    76  		if err := os.Chmod(srcfile, info.Mode()); err != nil {
    77  			return fmt.Errorf("can't set filemode on file %q: %v", srcfile, err)
    78  		}
    79  	}
    80  
    81  	if err := os.Rename(srcfile, destfile); err != nil {
    82  		return fmt.Errorf("cannot replace %q with %q: %v", destfile, srcfile, err)
    83  	}
    84  	return nil
    85  }
    86  
    87  // CleanPaths splits the given `paths` argument by the delimiter argument, and
    88  // then "cleans" that path according to the path.Clean function (see
    89  // https://golang.org/pkg/path#Clean).
    90  // Note always cleans to '/' path separators regardless of platform (git friendly)
    91  func CleanPaths(paths, delim string) (cleaned []string) {
    92  	// If paths is an empty string, splitting it will yield [""], which will
    93  	// become the path ".". To avoid this, bail out if trimmed paths
    94  	// argument is empty.
    95  	if paths = strings.TrimSpace(paths); len(paths) == 0 {
    96  		return
    97  	}
    98  
    99  	for _, part := range strings.Split(paths, delim) {
   100  		part = strings.TrimSpace(part)
   101  
   102  		// Remove trailing `/` or `\`, but only the first one.
   103  		for _, sep := range []string{`/`, `\`} {
   104  			if strings.HasSuffix(part, sep) {
   105  				part = strings.TrimSuffix(part, sep)
   106  				break
   107  			}
   108  		}
   109  
   110  		cleaned = append(cleaned, part)
   111  	}
   112  
   113  	return cleaned
   114  }
   115  
   116  // VerifyFileHash reads a file and verifies whether the SHA is correct
   117  // Returns an error if there is a problem
   118  func VerifyFileHash(oid, path string) error {
   119  	f, err := os.Open(path)
   120  	if err != nil {
   121  		return err
   122  	}
   123  	defer f.Close()
   124  
   125  	h := NewLfsContentHash()
   126  	_, err = io.Copy(h, f)
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	calcOid := hex.EncodeToString(h.Sum(nil))
   132  	if calcOid != oid {
   133  		return fmt.Errorf("File %q has an invalid hash %s, expected %s", path, calcOid, oid)
   134  	}
   135  
   136  	return nil
   137  }
   138  
   139  // FastWalkCallback is the signature for the callback given to FastWalkGitRepo()
   140  type FastWalkCallback func(parentDir string, info os.FileInfo, err error)
   141  
   142  // FastWalkGitRepo is a more optimal implementation of filepath.Walk for a Git
   143  // repo. The callback guaranteed to be called sequentially. The function returns
   144  // once all files and errors have triggered callbacks.
   145  // It differs in the following ways:
   146  //  * Uses goroutines to parallelise large dirs and descent into subdirs
   147  //  * Does not provide sorted output; parents will always be before children but
   148  //    there are no other guarantees. Use parentDir argument in the callback to
   149  //    determine absolute path rather than tracking it yourself
   150  //  * Automatically ignores any .git directories
   151  //  * Respects .gitignore contents and skips ignored files/dirs
   152  //
   153  // rootDir - Absolute path to the top of the repository working directory
   154  func FastWalkGitRepo(rootDir string, cb FastWalkCallback) {
   155  	walker := fastWalkWithExcludeFiles(rootDir, ".gitignore")
   156  	for file := range walker.ch {
   157  		cb(file.ParentDir, file.Info, file.Err)
   158  	}
   159  }
   160  
   161  // Returned from FastWalk with parent directory context
   162  // This is needed because FastWalk can provide paths out of order so the
   163  // parent dir cannot be implied
   164  type fastWalkInfo struct {
   165  	ParentDir string
   166  	Info      os.FileInfo
   167  	Err       error
   168  }
   169  
   170  type fastWalker struct {
   171  	rootDir         string
   172  	excludeFilename string
   173  	ch              chan fastWalkInfo
   174  	limit           int32
   175  	cur             *int32
   176  	wg              *sync.WaitGroup
   177  }
   178  
   179  // fastWalkWithExcludeFiles walks the contents of a dir, respecting
   180  // include/exclude patterns and also loading new exlude patterns from files
   181  // named excludeFilename in directories walked
   182  //
   183  // rootDir - Absolute path to the top of the repository working directory
   184  func fastWalkWithExcludeFiles(rootDir, excludeFilename string) *fastWalker {
   185  	excludePaths := []filepathfilter.Pattern{
   186  		filepathfilter.NewPattern(".git"),
   187  		filepathfilter.NewPattern("**/.git"),
   188  	}
   189  
   190  	limit, _ := strconv.Atoi(os.Getenv("LFS_FASTWALK_LIMIT"))
   191  	if limit < 1 {
   192  		limit = runtime.GOMAXPROCS(-1) * 20
   193  	}
   194  
   195  	c := int32(0)
   196  	w := &fastWalker{
   197  		rootDir:         rootDir,
   198  		excludeFilename: excludeFilename,
   199  		limit:           int32(limit),
   200  		cur:             &c,
   201  		ch:              make(chan fastWalkInfo, 256),
   202  		wg:              &sync.WaitGroup{},
   203  	}
   204  
   205  	go func() {
   206  		dirFi, err := os.Stat(w.rootDir)
   207  		if err != nil {
   208  			w.ch <- fastWalkInfo{Err: err}
   209  			return
   210  		}
   211  
   212  		w.Walk(true, "", dirFi, excludePaths)
   213  		w.Wait()
   214  	}()
   215  	return w
   216  }
   217  
   218  // Walk is the main recursive implementation of fast walk.
   219  // Sends the file/dir and any contents to the channel so long as it passes the
   220  // include/exclude filter. If a dir, parses any excludeFilename found and updates
   221  // the excludePaths with its content before (parallel) recursing into contents
   222  // Also splits large directories into multiple goroutines.
   223  // Increments waitg.Add(1) for each new goroutine launched internally
   224  //
   225  // workDir - Relative path inside the repository
   226  func (w *fastWalker) Walk(isRoot bool, workDir string, itemFi os.FileInfo,
   227  	excludePaths []filepathfilter.Pattern) {
   228  
   229  	var fullPath string      // Absolute path to the current file or dir
   230  	var parentWorkDir string // Absolute path to the workDir inside the repository
   231  	if isRoot {
   232  		fullPath = w.rootDir
   233  	} else {
   234  		parentWorkDir = join(w.rootDir, workDir)
   235  		fullPath = join(parentWorkDir, itemFi.Name())
   236  	}
   237  
   238  	workPath := join(workDir, itemFi.Name())
   239  	if !filepathfilter.NewFromPatterns(nil, excludePaths).Allows(workPath) {
   240  		return
   241  	}
   242  
   243  	w.ch <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi}
   244  
   245  	if !itemFi.IsDir() {
   246  		// Nothing more to do if this is not a dir
   247  		return
   248  	}
   249  
   250  	var childWorkDir string
   251  	if !isRoot {
   252  		childWorkDir = join(workDir, itemFi.Name())
   253  	}
   254  
   255  	if len(w.excludeFilename) > 0 {
   256  		possibleExcludeFile := join(fullPath, w.excludeFilename)
   257  		var err error
   258  		excludePaths, err = loadExcludeFilename(possibleExcludeFile, childWorkDir, excludePaths)
   259  		if err != nil {
   260  			w.ch <- fastWalkInfo{Err: err}
   261  		}
   262  	}
   263  
   264  	// The absolute optimal way to scan would be File.Readdirnames but we
   265  	// still need the Stat() to know whether something is a dir, so use
   266  	// File.Readdir instead. Means we can provide os.FileInfo to callers like
   267  	// filepath.Walk as a bonus.
   268  	df, err := os.Open(fullPath)
   269  	if err != nil {
   270  		w.ch <- fastWalkInfo{Err: err}
   271  		return
   272  	}
   273  
   274  	// The number of items in a dir we process in each goroutine
   275  	jobSize := 100
   276  	for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
   277  		// Parallelise all dirs, and chop large dirs into batches
   278  		w.walk(children, func(subitems []os.FileInfo) {
   279  			for _, childFi := range subitems {
   280  				w.Walk(false, childWorkDir, childFi, excludePaths)
   281  			}
   282  		})
   283  	}
   284  
   285  	df.Close()
   286  	if err != nil && err != io.EOF {
   287  		w.ch <- fastWalkInfo{Err: err}
   288  	}
   289  }
   290  
   291  func (w *fastWalker) walk(children []os.FileInfo, fn func([]os.FileInfo)) {
   292  	cur := atomic.AddInt32(w.cur, 1)
   293  	if cur > w.limit {
   294  		fn(children)
   295  		atomic.AddInt32(w.cur, -1)
   296  		return
   297  	}
   298  
   299  	w.wg.Add(1)
   300  	go func() {
   301  		fn(children)
   302  		w.wg.Done()
   303  		atomic.AddInt32(w.cur, -1)
   304  	}()
   305  }
   306  
   307  func (w *fastWalker) Wait() {
   308  	w.wg.Wait()
   309  	close(w.ch)
   310  }
   311  
   312  // loadExcludeFilename reads the given file in gitignore format and returns a
   313  // revised array of exclude paths if there are any changes.
   314  // If any changes are made a copy of the array is taken so the original is not
   315  // modified
   316  func loadExcludeFilename(filename, workDir string, excludePaths []filepathfilter.Pattern) ([]filepathfilter.Pattern, error) {
   317  	f, err := os.OpenFile(filename, os.O_RDONLY, 0644)
   318  	if err != nil {
   319  		if os.IsNotExist(err) {
   320  			return excludePaths, nil
   321  		}
   322  		return excludePaths, err
   323  	}
   324  	defer f.Close()
   325  
   326  	retPaths := excludePaths
   327  	modified := false
   328  
   329  	scanner := bufio.NewScanner(f)
   330  	for scanner.Scan() {
   331  		line := strings.TrimSpace(scanner.Text())
   332  		// Skip blanks, comments and negations (not supported right now)
   333  		if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") {
   334  			continue
   335  		}
   336  
   337  		if !modified {
   338  			// copy on write
   339  			retPaths = make([]filepathfilter.Pattern, len(excludePaths))
   340  			copy(retPaths, excludePaths)
   341  			modified = true
   342  		}
   343  
   344  		path := line
   345  		// Add pattern in context if exclude has separator, or no wildcard
   346  		// Allow for both styles of separator at this point
   347  		if strings.ContainsAny(path, "/\\") ||
   348  			!strings.Contains(path, "*") {
   349  			path = join(workDir, line)
   350  		}
   351  		retPaths = append(retPaths, filepathfilter.NewPattern(path))
   352  	}
   353  
   354  	return retPaths, nil
   355  }
   356  
   357  func join(paths ...string) string {
   358  	ne := make([]string, 0, len(paths))
   359  
   360  	for _, p := range paths {
   361  		if len(p) > 0 {
   362  			ne = append(ne, p)
   363  		}
   364  	}
   365  	return strings.Join(ne, "/")
   366  }
   367  
   368  // SetFileWriteFlag changes write permissions on a file
   369  // Used to make a file read-only or not. When writeEnabled = false, the write
   370  // bit is removed for all roles. When writeEnabled = true, the behaviour is
   371  // different per platform:
   372  // On Mac & Linux, the write bit is set only on the owner as per default umask.
   373  // All other bits are unaffected.
   374  // On Windows, all the write bits are set since Windows doesn't support Unix permissions.
   375  func SetFileWriteFlag(path string, writeEnabled bool) error {
   376  	stat, err := os.Stat(path)
   377  	if err != nil {
   378  		return err
   379  	}
   380  	mode := uint32(stat.Mode())
   381  
   382  	if (writeEnabled && (mode&0200) > 0) ||
   383  		(!writeEnabled && (mode&0222) == 0) {
   384  		// no change needed
   385  		return nil
   386  	}
   387  
   388  	if writeEnabled {
   389  		mode = mode | 0200 // set owner write only
   390  		// Go's own Chmod makes Windows set all though
   391  	} else {
   392  		mode = mode &^ 0222 // disable all write
   393  	}
   394  	return os.Chmod(path, os.FileMode(mode))
   395  }