github.com/advanderveer/restic@v0.8.1-0.20171209104529-42a8c19aaea6/cmd/restic/cmd_prune.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/restic/restic/internal/debug" 8 "github.com/restic/restic/internal/errors" 9 "github.com/restic/restic/internal/index" 10 "github.com/restic/restic/internal/repository" 11 "github.com/restic/restic/internal/restic" 12 13 "github.com/spf13/cobra" 14 ) 15 16 var cmdPrune = &cobra.Command{ 17 Use: "prune [flags]", 18 Short: "Remove unneeded data from the repository", 19 Long: ` 20 The "prune" command checks the repository and removes data that is not 21 referenced and therefore not needed any more. 22 `, 23 DisableAutoGenTag: true, 24 RunE: func(cmd *cobra.Command, args []string) error { 25 return runPrune(globalOptions) 26 }, 27 } 28 29 func init() { 30 cmdRoot.AddCommand(cmdPrune) 31 } 32 33 func shortenStatus(maxLength int, s string) string { 34 if len(s) <= maxLength { 35 return s 36 } 37 38 if maxLength < 3 { 39 return s[:maxLength] 40 } 41 42 return s[:maxLength-3] + "..." 43 } 44 45 // newProgressMax returns a progress that counts blobs. 46 func newProgressMax(show bool, max uint64, description string) *restic.Progress { 47 if !show { 48 return nil 49 } 50 51 p := restic.NewProgress() 52 53 p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) { 54 status := fmt.Sprintf("[%s] %s %d / %d %s", 55 formatDuration(d), 56 formatPercent(s.Blobs, max), 57 s.Blobs, max, description) 58 59 if w := stdoutTerminalWidth(); w > 0 { 60 status = shortenStatus(w, status) 61 } 62 63 PrintProgress("%s", status) 64 } 65 66 p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) { 67 fmt.Printf("\n") 68 } 69 70 return p 71 } 72 73 func runPrune(gopts GlobalOptions) error { 74 repo, err := OpenRepository(gopts) 75 if err != nil { 76 return err 77 } 78 79 lock, err := lockRepoExclusive(repo) 80 defer unlockRepo(lock) 81 if err != nil { 82 return err 83 } 84 85 return pruneRepository(gopts, repo) 86 } 87 88 func mixedBlobs(list []restic.Blob) bool { 89 var tree, data bool 90 91 for _, pb := range list { 92 switch pb.Type { 93 case restic.TreeBlob: 94 tree = true 95 case restic.DataBlob: 96 data = true 97 } 98 99 if tree && data { 100 return true 101 } 102 } 103 104 return false 105 } 106 107 func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { 108 ctx := gopts.ctx 109 110 err := repo.LoadIndex(ctx) 111 if err != nil { 112 return err 113 } 114 115 var stats struct { 116 blobs int 117 packs int 118 snapshots int 119 bytes int64 120 } 121 122 Verbosef("counting files in repo\n") 123 for range repo.List(ctx, restic.DataFile) { 124 stats.packs++ 125 } 126 127 Verbosef("building new index for repo\n") 128 129 bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs") 130 idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar) 131 if err != nil { 132 return err 133 } 134 135 for _, id := range invalidFiles { 136 Warnf("incomplete pack file (will be removed): %v\n", id) 137 } 138 139 blobs := 0 140 for _, pack := range idx.Packs { 141 stats.bytes += pack.Size 142 blobs += len(pack.Entries) 143 } 144 Verbosef("repository contains %v packs (%v blobs) with %v\n", 145 len(idx.Packs), blobs, formatBytes(uint64(stats.bytes))) 146 147 blobCount := make(map[restic.BlobHandle]int) 148 duplicateBlobs := 0 149 duplicateBytes := 0 150 151 // find duplicate blobs 152 for _, p := range idx.Packs { 153 for _, entry := range p.Entries { 154 stats.blobs++ 155 h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} 156 blobCount[h]++ 157 158 if blobCount[h] > 1 { 159 duplicateBlobs++ 160 duplicateBytes += int(entry.Length) 161 } 162 } 163 } 164 165 Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n", 166 stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes))) 167 Verbosef("load all snapshots\n") 168 169 // find referenced blobs 170 snapshots, err := restic.LoadAllSnapshots(ctx, repo) 171 if err != nil { 172 return err 173 } 174 175 stats.snapshots = len(snapshots) 176 177 Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots) 178 179 usedBlobs := restic.NewBlobSet() 180 seenBlobs := restic.NewBlobSet() 181 182 bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots") 183 bar.Start() 184 for _, sn := range snapshots { 185 debug.Log("process snapshot %v", sn.ID().Str()) 186 187 err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs) 188 if err != nil { 189 if repo.Backend().IsNotExist(err) { 190 return errors.Fatal("unable to load a tree from the repo: " + err.Error()) 191 } 192 193 return err 194 } 195 196 debug.Log("processed snapshot %v", sn.ID().Str()) 197 bar.Report(restic.Stat{Blobs: 1}) 198 } 199 bar.Done() 200 201 if len(usedBlobs) > stats.blobs { 202 return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" + 203 "Please report this error (along with the output of the 'prune' run) at\n" + 204 "https://github.com/restic/restic/issues/new") 205 } 206 207 Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", 208 len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs)) 209 210 // find packs that need a rewrite 211 rewritePacks := restic.NewIDSet() 212 for _, pack := range idx.Packs { 213 if mixedBlobs(pack.Entries) { 214 rewritePacks.Insert(pack.ID) 215 continue 216 } 217 218 for _, blob := range pack.Entries { 219 h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} 220 if !usedBlobs.Has(h) { 221 rewritePacks.Insert(pack.ID) 222 continue 223 } 224 225 if blobCount[h] > 1 { 226 rewritePacks.Insert(pack.ID) 227 } 228 } 229 } 230 231 removeBytes := duplicateBytes 232 233 // find packs that are unneeded 234 removePacks := restic.NewIDSet() 235 236 Verbosef("will remove %d invalid files\n", len(invalidFiles)) 237 for _, id := range invalidFiles { 238 removePacks.Insert(id) 239 } 240 241 for packID, p := range idx.Packs { 242 243 hasActiveBlob := false 244 for _, blob := range p.Entries { 245 h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} 246 if usedBlobs.Has(h) { 247 hasActiveBlob = true 248 continue 249 } 250 251 removeBytes += int(blob.Length) 252 } 253 254 if hasActiveBlob { 255 continue 256 } 257 258 removePacks.Insert(packID) 259 260 if !rewritePacks.Has(packID) { 261 return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str()) 262 } 263 264 rewritePacks.Delete(packID) 265 } 266 267 Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n", 268 len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes))) 269 270 var obsoletePacks restic.IDSet 271 if len(rewritePacks) != 0 { 272 bar = newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten") 273 bar.Start() 274 obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar) 275 if err != nil { 276 return err 277 } 278 bar.Done() 279 } 280 281 removePacks.Merge(obsoletePacks) 282 283 if err = rebuildIndex(ctx, repo, removePacks); err != nil { 284 return err 285 } 286 287 if len(removePacks) != 0 { 288 bar = newProgressMax(!gopts.Quiet, uint64(len(removePacks)), "packs deleted") 289 bar.Start() 290 for packID := range removePacks { 291 h := restic.Handle{Type: restic.DataFile, Name: packID.String()} 292 err = repo.Backend().Remove(ctx, h) 293 if err != nil { 294 Warnf("unable to remove file %v from the repository\n", packID.Str()) 295 } 296 bar.Report(restic.Stat{Blobs: 1}) 297 } 298 bar.Done() 299 } 300 301 Verbosef("done\n") 302 return nil 303 }