github.com/git-lfs/git-lfs@v2.5.2+incompatible/commands/command_prune.go (about)

     1  package commands
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"os"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/git-lfs/git-lfs/filepathfilter"
    11  	"github.com/git-lfs/git-lfs/fs"
    12  	"github.com/git-lfs/git-lfs/git"
    13  	"github.com/git-lfs/git-lfs/lfs"
    14  	"github.com/git-lfs/git-lfs/tasklog"
    15  	"github.com/git-lfs/git-lfs/tools"
    16  	"github.com/git-lfs/git-lfs/tools/humanize"
    17  	"github.com/git-lfs/git-lfs/tq"
    18  	"github.com/rubyist/tracerx"
    19  	"github.com/spf13/cobra"
    20  )
    21  
    22  var (
    23  	pruneDryRunArg      bool
    24  	pruneVerboseArg     bool
    25  	pruneVerifyArg      bool
    26  	pruneDoNotVerifyArg bool
    27  )
    28  
    29  func pruneCommand(cmd *cobra.Command, args []string) {
    30  	// Guts of this must be re-usable from fetch --prune so just parse & dispatch
    31  	if pruneVerifyArg && pruneDoNotVerifyArg {
    32  		Exit("Cannot specify both --verify-remote and --no-verify-remote")
    33  	}
    34  
    35  	fetchPruneConfig := lfs.NewFetchPruneConfig(cfg.Git)
    36  	verify := !pruneDoNotVerifyArg &&
    37  		(fetchPruneConfig.PruneVerifyRemoteAlways || pruneVerifyArg)
    38  	prune(fetchPruneConfig, verify, pruneDryRunArg, pruneVerboseArg)
    39  }
    40  
    41  type PruneProgressType int
    42  
    43  const (
    44  	PruneProgressTypeLocal  = PruneProgressType(iota)
    45  	PruneProgressTypeRetain = PruneProgressType(iota)
    46  	PruneProgressTypeVerify = PruneProgressType(iota)
    47  )
    48  
    49  // Progress from a sub-task of prune
    50  type PruneProgress struct {
    51  	ProgressType PruneProgressType
    52  	Count        int // Number of items done
    53  }
    54  type PruneProgressChan chan PruneProgress
    55  
    56  func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose bool) {
    57  	localObjects := make([]fs.Object, 0, 100)
    58  	retainedObjects := tools.NewStringSetWithCapacity(100)
    59  
    60  	logger := tasklog.NewLogger(OutputWriter)
    61  	defer logger.Close()
    62  
    63  	var reachableObjects tools.StringSet
    64  	var taskwait sync.WaitGroup
    65  
    66  	// Add all the base funcs to the waitgroup before starting them, in case
    67  	// one completes really fast & hits 0 unexpectedly
    68  	// each main process can Add() to the wg itself if it subdivides the task
    69  	taskwait.Add(4) // 1..4: localObjects, current & recent refs, unpushed, worktree
    70  	if verifyRemote {
    71  		taskwait.Add(1) // 5
    72  	}
    73  
    74  	progressChan := make(PruneProgressChan, 100)
    75  
    76  	// Collect errors
    77  	errorChan := make(chan error, 10)
    78  	var errorwait sync.WaitGroup
    79  	errorwait.Add(1)
    80  	var taskErrors []error
    81  	go pruneTaskCollectErrors(&taskErrors, errorChan, &errorwait)
    82  
    83  	// Populate the single list of local objects
    84  	go pruneTaskGetLocalObjects(&localObjects, progressChan, &taskwait)
    85  
    86  	// Now find files to be retained from many sources
    87  	retainChan := make(chan string, 100)
    88  
    89  	gitscanner := lfs.NewGitScanner(nil)
    90  	gitscanner.Filter = filepathfilter.New(nil, cfg.FetchExcludePaths())
    91  
    92  	go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait)
    93  	go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait)
    94  	go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait)
    95  	if verifyRemote {
    96  		reachableObjects = tools.NewStringSetWithCapacity(100)
    97  		go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait)
    98  	}
    99  
   100  	// Now collect all the retained objects, on separate wait
   101  	var retainwait sync.WaitGroup
   102  	retainwait.Add(1)
   103  	go pruneTaskCollectRetained(&retainedObjects, retainChan, progressChan, &retainwait)
   104  
   105  	// Report progress
   106  	var progresswait sync.WaitGroup
   107  	progresswait.Add(1)
   108  	go pruneTaskDisplayProgress(progressChan, &progresswait, logger)
   109  
   110  	taskwait.Wait() // wait for subtasks
   111  	gitscanner.Close()
   112  	close(retainChan) // triggers retain collector to end now all tasks have
   113  	retainwait.Wait() // make sure all retained objects added
   114  
   115  	close(errorChan) // triggers error collector to end now all tasks have
   116  	errorwait.Wait() // make sure all errors have been processed
   117  	pruneCheckErrors(taskErrors)
   118  
   119  	prunableObjects := make([]string, 0, len(localObjects)/2)
   120  
   121  	// Build list of prunables (also queue for verify at same time if applicable)
   122  	var verifyQueue *tq.TransferQueue
   123  	var verifiedObjects tools.StringSet
   124  	var totalSize int64
   125  	var verboseOutput []string
   126  	var verifyc chan *tq.Transfer
   127  	var verifywait sync.WaitGroup
   128  
   129  	if verifyRemote {
   130  		verifyQueue = newDownloadCheckQueue(
   131  			getTransferManifestOperationRemote("download", fetchPruneConfig.PruneRemoteName),
   132  			fetchPruneConfig.PruneRemoteName,
   133  		)
   134  		verifiedObjects = tools.NewStringSetWithCapacity(len(localObjects) / 2)
   135  
   136  		// this channel is filled with oids for which Check() succeeded & Transfer() was called
   137  		verifyc = verifyQueue.Watch()
   138  		verifywait.Add(1)
   139  		go func() {
   140  			for t := range verifyc {
   141  				verifiedObjects.Add(t.Oid)
   142  				tracerx.Printf("VERIFIED: %v", t.Oid)
   143  				progressChan <- PruneProgress{PruneProgressTypeVerify, 1}
   144  			}
   145  			verifywait.Done()
   146  		}()
   147  	}
   148  
   149  	for _, file := range localObjects {
   150  		if !retainedObjects.Contains(file.Oid) {
   151  			prunableObjects = append(prunableObjects, file.Oid)
   152  			totalSize += file.Size
   153  			if verbose {
   154  				// Save up verbose output for the end.
   155  				verboseOutput = append(verboseOutput,
   156  					fmt.Sprintf("%s (%s)",
   157  						file.Oid,
   158  						humanize.FormatBytes(uint64(file.Size))))
   159  			}
   160  
   161  			if verifyRemote {
   162  				tracerx.Printf("VERIFYING: %v", file.Oid)
   163  
   164  				verifyQueue.Add(downloadTransfer(&lfs.WrappedPointer{
   165  					Pointer: lfs.NewPointer(file.Oid, file.Size, nil),
   166  				}))
   167  			}
   168  		}
   169  	}
   170  
   171  	if verifyRemote {
   172  		verifyQueue.Wait()
   173  		verifywait.Wait()
   174  		close(progressChan) // after verify but before check
   175  		progresswait.Wait()
   176  		pruneCheckVerified(prunableObjects, reachableObjects, verifiedObjects)
   177  	} else {
   178  		close(progressChan)
   179  		progresswait.Wait()
   180  	}
   181  
   182  	if len(prunableObjects) == 0 {
   183  		return
   184  	}
   185  
   186  	info := tasklog.NewSimpleTask()
   187  	logger.Enqueue(info)
   188  	if dryRun {
   189  		info.Logf("prune: %d file(s) would be pruned (%s)", len(prunableObjects), humanize.FormatBytes(uint64(totalSize)))
   190  		for _, item := range verboseOutput {
   191  			info.Logf("\n * %s", item)
   192  		}
   193  		info.Complete()
   194  	} else {
   195  		for _, item := range verboseOutput {
   196  			info.Logf("\n%s", item)
   197  		}
   198  		info.Complete()
   199  
   200  		pruneDeleteFiles(prunableObjects, logger)
   201  	}
   202  }
   203  
   204  func pruneCheckVerified(prunableObjects []string, reachableObjects, verifiedObjects tools.StringSet) {
   205  	// There's no issue if an object is not reachable and missing, only if reachable & missing
   206  	var problems bytes.Buffer
   207  	for _, oid := range prunableObjects {
   208  		// Test verified first as most likely reachable
   209  		if !verifiedObjects.Contains(oid) {
   210  			if reachableObjects.Contains(oid) {
   211  				problems.WriteString(fmt.Sprintf(" * %v\n", oid))
   212  			} else {
   213  				// Just to indicate why it doesn't matter that we didn't verify
   214  				tracerx.Printf("UNREACHABLE: %v", oid)
   215  			}
   216  		}
   217  	}
   218  	// technically we could still prune the other oids, but this indicates a
   219  	// more serious issue because the local state implies that these can be
   220  	// deleted but that's incorrect; bad state has occurred somehow, might need
   221  	// push --all to resolve
   222  	if problems.Len() > 0 {
   223  		Exit("Abort: these objects to be pruned are missing on remote:\n%v", problems.String())
   224  	}
   225  }
   226  
   227  func pruneCheckErrors(taskErrors []error) {
   228  	if len(taskErrors) > 0 {
   229  		for _, err := range taskErrors {
   230  			LoggedError(err, "Prune error: %v", err)
   231  		}
   232  		Exit("Prune sub-tasks failed, cannot continue")
   233  	}
   234  }
   235  
   236  func pruneTaskDisplayProgress(progressChan PruneProgressChan, waitg *sync.WaitGroup, logger *tasklog.Logger) {
   237  	defer waitg.Done()
   238  
   239  	task := tasklog.NewSimpleTask()
   240  	defer task.Complete()
   241  
   242  	logger.Enqueue(task)
   243  
   244  	localCount := 0
   245  	retainCount := 0
   246  	verifyCount := 0
   247  	var msg string
   248  	for p := range progressChan {
   249  		switch p.ProgressType {
   250  		case PruneProgressTypeLocal:
   251  			localCount++
   252  		case PruneProgressTypeRetain:
   253  			retainCount++
   254  		case PruneProgressTypeVerify:
   255  			verifyCount++
   256  		}
   257  		msg = fmt.Sprintf("prune: %d local object(s), %d retained", localCount, retainCount)
   258  		if verifyCount > 0 {
   259  			msg += fmt.Sprintf(", %d verified with remote", verifyCount)
   260  		}
   261  		task.Log(msg)
   262  	}
   263  }
   264  
   265  func pruneTaskCollectRetained(outRetainedObjects *tools.StringSet, retainChan chan string,
   266  	progressChan PruneProgressChan, retainwait *sync.WaitGroup) {
   267  
   268  	defer retainwait.Done()
   269  
   270  	for oid := range retainChan {
   271  		if outRetainedObjects.Add(oid) {
   272  			progressChan <- PruneProgress{PruneProgressTypeRetain, 1}
   273  		}
   274  	}
   275  
   276  }
   277  
   278  func pruneTaskCollectErrors(outtaskErrors *[]error, errorChan chan error, errorwait *sync.WaitGroup) {
   279  	defer errorwait.Done()
   280  
   281  	for err := range errorChan {
   282  		*outtaskErrors = append(*outtaskErrors, err)
   283  	}
   284  }
   285  
   286  func pruneDeleteFiles(prunableObjects []string, logger *tasklog.Logger) {
   287  	task := logger.Percentage("prune: Deleting objects", uint64(len(prunableObjects)))
   288  
   289  	var problems bytes.Buffer
   290  	// In case we fail to delete some
   291  	var deletedFiles int
   292  	for _, oid := range prunableObjects {
   293  		mediaFile, err := cfg.Filesystem().ObjectPath(oid)
   294  		if err != nil {
   295  			problems.WriteString(fmt.Sprintf("Unable to find media path for %v: %v\n", oid, err))
   296  			continue
   297  		}
   298  		err = os.Remove(mediaFile)
   299  		if err != nil {
   300  			problems.WriteString(fmt.Sprintf("Failed to remove file %v: %v\n", mediaFile, err))
   301  			continue
   302  		}
   303  		deletedFiles++
   304  		task.Count(1)
   305  	}
   306  	if problems.Len() > 0 {
   307  		LoggedError(fmt.Errorf("Failed to delete some files"), problems.String())
   308  		Exit("Prune failed, see errors above")
   309  	}
   310  }
   311  
   312  // Background task, must call waitg.Done() once at end
   313  func pruneTaskGetLocalObjects(outLocalObjects *[]fs.Object, progChan PruneProgressChan, waitg *sync.WaitGroup) {
   314  	defer waitg.Done()
   315  
   316  	cfg.EachLFSObject(func(obj fs.Object) error {
   317  		*outLocalObjects = append(*outLocalObjects, obj)
   318  		progChan <- PruneProgress{PruneProgressTypeLocal, 1}
   319  		return nil
   320  	})
   321  }
   322  
   323  // Background task, must call waitg.Done() once at end
   324  func pruneTaskGetRetainedAtRef(gitscanner *lfs.GitScanner, ref string, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
   325  	defer waitg.Done()
   326  
   327  	err := gitscanner.ScanRef(ref, func(p *lfs.WrappedPointer, err error) {
   328  		if err != nil {
   329  			errorChan <- err
   330  			return
   331  		}
   332  
   333  		retainChan <- p.Oid
   334  		tracerx.Printf("RETAIN: %v via ref %v", p.Oid, ref)
   335  	})
   336  
   337  	if err != nil {
   338  		errorChan <- err
   339  	}
   340  }
   341  
   342  // Background task, must call waitg.Done() once at end
   343  func pruneTaskGetPreviousVersionsOfRef(gitscanner *lfs.GitScanner, ref string, since time.Time, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
   344  	defer waitg.Done()
   345  
   346  	err := gitscanner.ScanPreviousVersions(ref, since, func(p *lfs.WrappedPointer, err error) {
   347  		if err != nil {
   348  			errorChan <- err
   349  			return
   350  		}
   351  
   352  		retainChan <- p.Oid
   353  		tracerx.Printf("RETAIN: %v via ref %v >= %v", p.Oid, ref, since)
   354  	})
   355  
   356  	if err != nil {
   357  		errorChan <- err
   358  		return
   359  	}
   360  }
   361  
   362  // Background task, must call waitg.Done() once at end
   363  func pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
   364  	defer waitg.Done()
   365  
   366  	// We actually increment the waitg in this func since we kick off sub-goroutines
   367  	// Make a list of what unique commits to keep, & search backward from
   368  	commits := tools.NewStringSet()
   369  	// Do current first
   370  	ref, err := git.CurrentRef()
   371  	if err != nil {
   372  		errorChan <- err
   373  		return
   374  	}
   375  	commits.Add(ref.Sha)
   376  	waitg.Add(1)
   377  	go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
   378  
   379  	// Now recent
   380  	if fetchconf.FetchRecentRefsDays > 0 {
   381  		pruneRefDays := fetchconf.FetchRecentRefsDays + fetchconf.PruneOffsetDays
   382  		tracerx.Printf("PRUNE: Retaining non-HEAD refs within %d (%d+%d) days", pruneRefDays, fetchconf.FetchRecentRefsDays, fetchconf.PruneOffsetDays)
   383  		refsSince := time.Now().AddDate(0, 0, -pruneRefDays)
   384  		// Keep all recent refs including any recent remote branches
   385  		refs, err := git.RecentBranches(refsSince, fetchconf.FetchRecentRefsIncludeRemotes, "")
   386  		if err != nil {
   387  			Panic(err, "Could not scan for recent refs")
   388  		}
   389  		for _, ref := range refs {
   390  			if commits.Add(ref.Sha) {
   391  				// A new commit
   392  				waitg.Add(1)
   393  				go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
   394  			}
   395  		}
   396  	}
   397  
   398  	// For every unique commit we've fetched, check recent commits too
   399  	// Only if we're fetching recent commits, otherwise only keep at refs
   400  	if fetchconf.FetchRecentCommitsDays > 0 {
   401  		pruneCommitDays := fetchconf.FetchRecentCommitsDays + fetchconf.PruneOffsetDays
   402  		for commit := range commits.Iter() {
   403  			// We measure from the last commit at the ref
   404  			summ, err := git.GetCommitSummary(commit)
   405  			if err != nil {
   406  				errorChan <- fmt.Errorf("Couldn't scan commits at %v: %v", commit, err)
   407  				continue
   408  			}
   409  			commitsSince := summ.CommitDate.AddDate(0, 0, -pruneCommitDays)
   410  			waitg.Add(1)
   411  			go pruneTaskGetPreviousVersionsOfRef(gitscanner, commit, commitsSince, retainChan, errorChan, waitg)
   412  		}
   413  	}
   414  }
   415  
   416  // Background task, must call waitg.Done() once at end
   417  func pruneTaskGetRetainedUnpushed(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
   418  	defer waitg.Done()
   419  
   420  	err := gitscanner.ScanUnpushed(fetchconf.PruneRemoteName, func(p *lfs.WrappedPointer, err error) {
   421  		if err != nil {
   422  			errorChan <- err
   423  		} else {
   424  			retainChan <- p.Pointer.Oid
   425  			tracerx.Printf("RETAIN: %v unpushed", p.Pointer.Oid)
   426  		}
   427  	})
   428  
   429  	if err != nil {
   430  		errorChan <- err
   431  		return
   432  	}
   433  }
   434  
   435  // Background task, must call waitg.Done() once at end
   436  func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
   437  	defer waitg.Done()
   438  
   439  	// Retain other worktree HEADs too
   440  	// Working copy, branch & maybe commit is different but repo is shared
   441  	allWorktreeRefs, err := git.GetAllWorkTreeHEADs(cfg.LocalGitStorageDir())
   442  	if err != nil {
   443  		errorChan <- err
   444  		return
   445  	}
   446  	// Don't repeat any commits, worktrees are always on their own branches but
   447  	// may point to the same commit
   448  	commits := tools.NewStringSet()
   449  	// current HEAD is done elsewhere
   450  	headref, err := git.CurrentRef()
   451  	if err != nil {
   452  		errorChan <- err
   453  		return
   454  	}
   455  	commits.Add(headref.Sha)
   456  	for _, ref := range allWorktreeRefs {
   457  		if commits.Add(ref.Sha) {
   458  			// Worktree is on a different commit
   459  			waitg.Add(1)
   460  			// Don't need to 'cd' to worktree since we share same repo
   461  			go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
   462  		}
   463  	}
   464  }
   465  
   466  // Background task, must call waitg.Done() once at end
   467  func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup) {
   468  	defer waitg.Done()
   469  
   470  	err := gitscanner.ScanAll(func(p *lfs.WrappedPointer, err error) {
   471  		if err != nil {
   472  			errorChan <- err
   473  			return
   474  		}
   475  		outObjectSet.Add(p.Oid)
   476  	})
   477  
   478  	if err != nil {
   479  		errorChan <- err
   480  	}
   481  }
   482  
   483  func init() {
   484  	RegisterCommand("prune", pruneCommand, func(cmd *cobra.Command) {
   485  		cmd.Flags().BoolVarP(&pruneDryRunArg, "dry-run", "d", false, "Don't delete anything, just report")
   486  		cmd.Flags().BoolVarP(&pruneVerboseArg, "verbose", "v", false, "Print full details of what is/would be deleted")
   487  		cmd.Flags().BoolVarP(&pruneVerifyArg, "verify-remote", "c", false, "Verify that remote has LFS files before deleting")
   488  		cmd.Flags().BoolVar(&pruneDoNotVerifyArg, "no-verify-remote", false, "Override lfs.pruneverifyremotealways and don't verify")
   489  	})
   490  }