github.com/stffabi/git-lfs@v2.3.5-0.20180214015214-8eeaa8d88902+incompatible/commands/command_prune.go (about) 1 package commands 2 3 import ( 4 "bytes" 5 "fmt" 6 "os" 7 "sync" 8 "time" 9 10 "github.com/git-lfs/git-lfs/filepathfilter" 11 "github.com/git-lfs/git-lfs/fs" 12 "github.com/git-lfs/git-lfs/git" 13 "github.com/git-lfs/git-lfs/lfs" 14 "github.com/git-lfs/git-lfs/tasklog" 15 "github.com/git-lfs/git-lfs/tools" 16 "github.com/git-lfs/git-lfs/tools/humanize" 17 "github.com/git-lfs/git-lfs/tq" 18 "github.com/rubyist/tracerx" 19 "github.com/spf13/cobra" 20 ) 21 22 var ( 23 pruneDryRunArg bool 24 pruneVerboseArg bool 25 pruneVerifyArg bool 26 pruneDoNotVerifyArg bool 27 ) 28 29 func pruneCommand(cmd *cobra.Command, args []string) { 30 // Guts of this must be re-usable from fetch --prune so just parse & dispatch 31 if pruneVerifyArg && pruneDoNotVerifyArg { 32 Exit("Cannot specify both --verify-remote and --no-verify-remote") 33 } 34 35 fetchPruneConfig := lfs.NewFetchPruneConfig(cfg.Git) 36 verify := !pruneDoNotVerifyArg && 37 (fetchPruneConfig.PruneVerifyRemoteAlways || pruneVerifyArg) 38 prune(fetchPruneConfig, verify, pruneDryRunArg, pruneVerboseArg) 39 } 40 41 type PruneProgressType int 42 43 const ( 44 PruneProgressTypeLocal = PruneProgressType(iota) 45 PruneProgressTypeRetain = PruneProgressType(iota) 46 PruneProgressTypeVerify = PruneProgressType(iota) 47 ) 48 49 // Progress from a sub-task of prune 50 type PruneProgress struct { 51 ProgressType PruneProgressType 52 Count int // Number of items done 53 } 54 type PruneProgressChan chan PruneProgress 55 56 func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose bool) { 57 localObjects := make([]fs.Object, 0, 100) 58 retainedObjects := tools.NewStringSetWithCapacity(100) 59 60 logger := tasklog.NewLogger(OutputWriter) 61 defer logger.Close() 62 63 var reachableObjects tools.StringSet 64 var taskwait sync.WaitGroup 65 66 // Add all the base funcs to the waitgroup before starting them, in case 67 // one completes really fast & hits 0 unexpectedly 68 // each main process can Add() to the wg itself if it subdivides the task 69 taskwait.Add(4) // 1..4: localObjects, current & recent refs, unpushed, worktree 70 if verifyRemote { 71 taskwait.Add(1) // 5 72 } 73 74 progressChan := make(PruneProgressChan, 100) 75 76 // Collect errors 77 errorChan := make(chan error, 10) 78 var errorwait sync.WaitGroup 79 errorwait.Add(1) 80 var taskErrors []error 81 go pruneTaskCollectErrors(&taskErrors, errorChan, &errorwait) 82 83 // Populate the single list of local objects 84 go pruneTaskGetLocalObjects(&localObjects, progressChan, &taskwait) 85 86 // Now find files to be retained from many sources 87 retainChan := make(chan string, 100) 88 89 gitscanner := lfs.NewGitScanner(nil) 90 gitscanner.Filter = filepathfilter.New(nil, cfg.FetchExcludePaths()) 91 92 go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait) 93 go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait) 94 go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait) 95 if verifyRemote { 96 reachableObjects = tools.NewStringSetWithCapacity(100) 97 go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait) 98 } 99 100 // Now collect all the retained objects, on separate wait 101 var retainwait sync.WaitGroup 102 retainwait.Add(1) 103 go pruneTaskCollectRetained(&retainedObjects, retainChan, progressChan, &retainwait) 104 105 // Report progress 106 var progresswait sync.WaitGroup 107 progresswait.Add(1) 108 go pruneTaskDisplayProgress(progressChan, &progresswait, logger) 109 110 taskwait.Wait() // wait for subtasks 111 gitscanner.Close() 112 close(retainChan) // triggers retain collector to end now all tasks have 113 retainwait.Wait() // make sure all retained objects added 114 115 close(errorChan) // triggers error collector to end now all tasks have 116 errorwait.Wait() // make sure all errors have been processed 117 pruneCheckErrors(taskErrors) 118 119 prunableObjects := make([]string, 0, len(localObjects)/2) 120 121 // Build list of prunables (also queue for verify at same time if applicable) 122 var verifyQueue *tq.TransferQueue 123 var verifiedObjects tools.StringSet 124 var totalSize int64 125 var verboseOutput []string 126 var verifyc chan *tq.Transfer 127 var verifywait sync.WaitGroup 128 129 if verifyRemote { 130 verifyQueue = newDownloadCheckQueue( 131 getTransferManifestOperationRemote("download", fetchPruneConfig.PruneRemoteName), 132 fetchPruneConfig.PruneRemoteName, 133 ) 134 verifiedObjects = tools.NewStringSetWithCapacity(len(localObjects) / 2) 135 136 // this channel is filled with oids for which Check() succeeded & Transfer() was called 137 verifyc = verifyQueue.Watch() 138 verifywait.Add(1) 139 go func() { 140 for t := range verifyc { 141 verifiedObjects.Add(t.Oid) 142 tracerx.Printf("VERIFIED: %v", t.Oid) 143 progressChan <- PruneProgress{PruneProgressTypeVerify, 1} 144 } 145 verifywait.Done() 146 }() 147 } 148 149 for _, file := range localObjects { 150 if !retainedObjects.Contains(file.Oid) { 151 prunableObjects = append(prunableObjects, file.Oid) 152 totalSize += file.Size 153 if verbose { 154 // Save up verbose output for the end. 155 verboseOutput = append(verboseOutput, 156 fmt.Sprintf("%s (%s)", 157 file.Oid, 158 humanize.FormatBytes(uint64(file.Size)))) 159 } 160 161 if verifyRemote { 162 tracerx.Printf("VERIFYING: %v", file.Oid) 163 164 verifyQueue.Add(downloadTransfer(&lfs.WrappedPointer{ 165 Pointer: lfs.NewPointer(file.Oid, file.Size, nil), 166 })) 167 } 168 } 169 } 170 171 if verifyRemote { 172 verifyQueue.Wait() 173 verifywait.Wait() 174 close(progressChan) // after verify but before check 175 progresswait.Wait() 176 pruneCheckVerified(prunableObjects, reachableObjects, verifiedObjects) 177 } else { 178 close(progressChan) 179 progresswait.Wait() 180 } 181 182 if len(prunableObjects) == 0 { 183 return 184 } 185 186 info := tasklog.NewSimpleTask() 187 logger.Enqueue(info) 188 if dryRun { 189 info.Logf("prune: %d file(s) would be pruned (%s)", len(prunableObjects), humanize.FormatBytes(uint64(totalSize))) 190 for _, item := range verboseOutput { 191 info.Logf("\n * %s", item) 192 } 193 info.Complete() 194 } else { 195 for _, item := range verboseOutput { 196 info.Logf("\n%s", item) 197 } 198 info.Complete() 199 200 pruneDeleteFiles(prunableObjects, logger) 201 } 202 } 203 204 func pruneCheckVerified(prunableObjects []string, reachableObjects, verifiedObjects tools.StringSet) { 205 // There's no issue if an object is not reachable and missing, only if reachable & missing 206 var problems bytes.Buffer 207 for _, oid := range prunableObjects { 208 // Test verified first as most likely reachable 209 if !verifiedObjects.Contains(oid) { 210 if reachableObjects.Contains(oid) { 211 problems.WriteString(fmt.Sprintf(" * %v\n", oid)) 212 } else { 213 // Just to indicate why it doesn't matter that we didn't verify 214 tracerx.Printf("UNREACHABLE: %v", oid) 215 } 216 } 217 } 218 // technically we could still prune the other oids, but this indicates a 219 // more serious issue because the local state implies that these can be 220 // deleted but that's incorrect; bad state has occurred somehow, might need 221 // push --all to resolve 222 if problems.Len() > 0 { 223 Exit("Abort: these objects to be pruned are missing on remote:\n%v", problems.String()) 224 } 225 } 226 227 func pruneCheckErrors(taskErrors []error) { 228 if len(taskErrors) > 0 { 229 for _, err := range taskErrors { 230 LoggedError(err, "Prune error: %v", err) 231 } 232 Exit("Prune sub-tasks failed, cannot continue") 233 } 234 } 235 236 func pruneTaskDisplayProgress(progressChan PruneProgressChan, waitg *sync.WaitGroup, logger *tasklog.Logger) { 237 defer waitg.Done() 238 239 task := tasklog.NewSimpleTask() 240 defer task.Complete() 241 242 logger.Enqueue(task) 243 244 localCount := 0 245 retainCount := 0 246 verifyCount := 0 247 var msg string 248 for p := range progressChan { 249 switch p.ProgressType { 250 case PruneProgressTypeLocal: 251 localCount++ 252 case PruneProgressTypeRetain: 253 retainCount++ 254 case PruneProgressTypeVerify: 255 verifyCount++ 256 } 257 msg = fmt.Sprintf("prune: %d local object(s), %d retained", localCount, retainCount) 258 if verifyCount > 0 { 259 msg += fmt.Sprintf(", %d verified with remote", verifyCount) 260 } 261 task.Log(msg) 262 } 263 } 264 265 func pruneTaskCollectRetained(outRetainedObjects *tools.StringSet, retainChan chan string, 266 progressChan PruneProgressChan, retainwait *sync.WaitGroup) { 267 268 defer retainwait.Done() 269 270 for oid := range retainChan { 271 if outRetainedObjects.Add(oid) { 272 progressChan <- PruneProgress{PruneProgressTypeRetain, 1} 273 } 274 } 275 276 } 277 278 func pruneTaskCollectErrors(outtaskErrors *[]error, errorChan chan error, errorwait *sync.WaitGroup) { 279 defer errorwait.Done() 280 281 for err := range errorChan { 282 *outtaskErrors = append(*outtaskErrors, err) 283 } 284 } 285 286 func pruneDeleteFiles(prunableObjects []string, logger *tasklog.Logger) { 287 task := logger.Percentage("prune: Deleting objects", uint64(len(prunableObjects))) 288 289 var problems bytes.Buffer 290 // In case we fail to delete some 291 var deletedFiles int 292 for _, oid := range prunableObjects { 293 mediaFile, err := cfg.Filesystem().ObjectPath(oid) 294 if err != nil { 295 problems.WriteString(fmt.Sprintf("Unable to find media path for %v: %v\n", oid, err)) 296 continue 297 } 298 err = os.Remove(mediaFile) 299 if err != nil { 300 problems.WriteString(fmt.Sprintf("Failed to remove file %v: %v\n", mediaFile, err)) 301 continue 302 } 303 deletedFiles++ 304 task.Count(1) 305 } 306 if problems.Len() > 0 { 307 LoggedError(fmt.Errorf("Failed to delete some files"), problems.String()) 308 Exit("Prune failed, see errors above") 309 } 310 } 311 312 // Background task, must call waitg.Done() once at end 313 func pruneTaskGetLocalObjects(outLocalObjects *[]fs.Object, progChan PruneProgressChan, waitg *sync.WaitGroup) { 314 defer waitg.Done() 315 316 cfg.EachLFSObject(func(obj fs.Object) error { 317 *outLocalObjects = append(*outLocalObjects, obj) 318 progChan <- PruneProgress{PruneProgressTypeLocal, 1} 319 return nil 320 }) 321 } 322 323 // Background task, must call waitg.Done() once at end 324 func pruneTaskGetRetainedAtRef(gitscanner *lfs.GitScanner, ref string, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) { 325 defer waitg.Done() 326 327 err := gitscanner.ScanRef(ref, func(p *lfs.WrappedPointer, err error) { 328 if err != nil { 329 errorChan <- err 330 return 331 } 332 333 retainChan <- p.Oid 334 tracerx.Printf("RETAIN: %v via ref %v", p.Oid, ref) 335 }) 336 337 if err != nil { 338 errorChan <- err 339 } 340 } 341 342 // Background task, must call waitg.Done() once at end 343 func pruneTaskGetPreviousVersionsOfRef(gitscanner *lfs.GitScanner, ref string, since time.Time, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) { 344 defer waitg.Done() 345 346 err := gitscanner.ScanPreviousVersions(ref, since, func(p *lfs.WrappedPointer, err error) { 347 if err != nil { 348 errorChan <- err 349 return 350 } 351 352 retainChan <- p.Oid 353 tracerx.Printf("RETAIN: %v via ref %v >= %v", p.Oid, ref, since) 354 }) 355 356 if err != nil { 357 errorChan <- err 358 return 359 } 360 } 361 362 // Background task, must call waitg.Done() once at end 363 func pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) { 364 defer waitg.Done() 365 366 // We actually increment the waitg in this func since we kick off sub-goroutines 367 // Make a list of what unique commits to keep, & search backward from 368 commits := tools.NewStringSet() 369 // Do current first 370 ref, err := git.CurrentRef() 371 if err != nil { 372 errorChan <- err 373 return 374 } 375 commits.Add(ref.Sha) 376 waitg.Add(1) 377 go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg) 378 379 // Now recent 380 if fetchconf.FetchRecentRefsDays > 0 { 381 pruneRefDays := fetchconf.FetchRecentRefsDays + fetchconf.PruneOffsetDays 382 tracerx.Printf("PRUNE: Retaining non-HEAD refs within %d (%d+%d) days", pruneRefDays, fetchconf.FetchRecentRefsDays, fetchconf.PruneOffsetDays) 383 refsSince := time.Now().AddDate(0, 0, -pruneRefDays) 384 // Keep all recent refs including any recent remote branches 385 refs, err := git.RecentBranches(refsSince, fetchconf.FetchRecentRefsIncludeRemotes, "") 386 if err != nil { 387 Panic(err, "Could not scan for recent refs") 388 } 389 for _, ref := range refs { 390 if commits.Add(ref.Sha) { 391 // A new commit 392 waitg.Add(1) 393 go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg) 394 } 395 } 396 } 397 398 // For every unique commit we've fetched, check recent commits too 399 // Only if we're fetching recent commits, otherwise only keep at refs 400 if fetchconf.FetchRecentCommitsDays > 0 { 401 pruneCommitDays := fetchconf.FetchRecentCommitsDays + fetchconf.PruneOffsetDays 402 for commit := range commits.Iter() { 403 // We measure from the last commit at the ref 404 summ, err := git.GetCommitSummary(commit) 405 if err != nil { 406 errorChan <- fmt.Errorf("Couldn't scan commits at %v: %v", commit, err) 407 continue 408 } 409 commitsSince := summ.CommitDate.AddDate(0, 0, -pruneCommitDays) 410 waitg.Add(1) 411 go pruneTaskGetPreviousVersionsOfRef(gitscanner, commit, commitsSince, retainChan, errorChan, waitg) 412 } 413 } 414 } 415 416 // Background task, must call waitg.Done() once at end 417 func pruneTaskGetRetainedUnpushed(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) { 418 defer waitg.Done() 419 420 err := gitscanner.ScanUnpushed(fetchconf.PruneRemoteName, func(p *lfs.WrappedPointer, err error) { 421 if err != nil { 422 errorChan <- err 423 } else { 424 retainChan <- p.Pointer.Oid 425 tracerx.Printf("RETAIN: %v unpushed", p.Pointer.Oid) 426 } 427 }) 428 429 if err != nil { 430 errorChan <- err 431 return 432 } 433 } 434 435 // Background task, must call waitg.Done() once at end 436 func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) { 437 defer waitg.Done() 438 439 // Retain other worktree HEADs too 440 // Working copy, branch & maybe commit is different but repo is shared 441 allWorktreeRefs, err := git.GetAllWorkTreeHEADs(cfg.LocalGitStorageDir()) 442 if err != nil { 443 errorChan <- err 444 return 445 } 446 // Don't repeat any commits, worktrees are always on their own branches but 447 // may point to the same commit 448 commits := tools.NewStringSet() 449 // current HEAD is done elsewhere 450 headref, err := git.CurrentRef() 451 if err != nil { 452 errorChan <- err 453 return 454 } 455 commits.Add(headref.Sha) 456 for _, ref := range allWorktreeRefs { 457 if commits.Add(ref.Sha) { 458 // Worktree is on a different commit 459 waitg.Add(1) 460 // Don't need to 'cd' to worktree since we share same repo 461 go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg) 462 } 463 } 464 } 465 466 // Background task, must call waitg.Done() once at end 467 func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup) { 468 defer waitg.Done() 469 470 err := gitscanner.ScanAll(func(p *lfs.WrappedPointer, err error) { 471 if err != nil { 472 errorChan <- err 473 return 474 } 475 outObjectSet.Add(p.Oid) 476 }) 477 478 if err != nil { 479 errorChan <- err 480 } 481 } 482 483 func init() { 484 RegisterCommand("prune", pruneCommand, func(cmd *cobra.Command) { 485 cmd.Flags().BoolVarP(&pruneDryRunArg, "dry-run", "d", false, "Don't delete anything, just report") 486 cmd.Flags().BoolVarP(&pruneVerboseArg, "verbose", "v", false, "Print full details of what is/would be deleted") 487 cmd.Flags().BoolVarP(&pruneVerifyArg, "verify-remote", "c", false, "Verify that remote has LFS files before deleting") 488 cmd.Flags().BoolVar(&pruneDoNotVerifyArg, "no-verify-remote", false, "Override lfs.pruneverifyremotealways and don't verify") 489 }) 490 }