github.com/mckael/restic@v0.8.3/cmd/restic/cmd_prune.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/restic/restic/internal/debug" 8 "github.com/restic/restic/internal/errors" 9 "github.com/restic/restic/internal/index" 10 "github.com/restic/restic/internal/repository" 11 "github.com/restic/restic/internal/restic" 12 13 "github.com/spf13/cobra" 14 ) 15 16 var cmdPrune = &cobra.Command{ 17 Use: "prune [flags]", 18 Short: "Remove unneeded data from the repository", 19 Long: ` 20 The "prune" command checks the repository and removes data that is not 21 referenced and therefore not needed any more. 22 `, 23 DisableAutoGenTag: true, 24 RunE: func(cmd *cobra.Command, args []string) error { 25 return runPrune(globalOptions) 26 }, 27 } 28 29 func init() { 30 cmdRoot.AddCommand(cmdPrune) 31 } 32 33 func shortenStatus(maxLength int, s string) string { 34 if len(s) <= maxLength { 35 return s 36 } 37 38 if maxLength < 3 { 39 return s[:maxLength] 40 } 41 42 return s[:maxLength-3] + "..." 43 } 44 45 // newProgressMax returns a progress that counts blobs. 46 func newProgressMax(show bool, max uint64, description string) *restic.Progress { 47 if !show { 48 return nil 49 } 50 51 p := restic.NewProgress() 52 53 p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) { 54 status := fmt.Sprintf("[%s] %s %d / %d %s", 55 formatDuration(d), 56 formatPercent(s.Blobs, max), 57 s.Blobs, max, description) 58 59 if w := stdoutTerminalWidth(); w > 0 { 60 status = shortenStatus(w, status) 61 } 62 63 PrintProgress("%s", status) 64 } 65 66 p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) { 67 fmt.Printf("\n") 68 } 69 70 return p 71 } 72 73 func runPrune(gopts GlobalOptions) error { 74 repo, err := OpenRepository(gopts) 75 if err != nil { 76 return err 77 } 78 79 lock, err := lockRepoExclusive(repo) 80 defer unlockRepo(lock) 81 if err != nil { 82 return err 83 } 84 85 return pruneRepository(gopts, repo) 86 } 87 88 func mixedBlobs(list []restic.Blob) bool { 89 var tree, data bool 90 91 for _, pb := range list { 92 switch pb.Type { 93 case restic.TreeBlob: 94 tree = true 95 case restic.DataBlob: 96 data = true 97 } 98 99 if tree && data { 100 return true 101 } 102 } 103 104 return false 105 } 106 107 func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { 108 ctx := gopts.ctx 109 110 err := repo.LoadIndex(ctx) 111 if err != nil { 112 return err 113 } 114 115 var stats struct { 116 blobs int 117 packs int 118 snapshots int 119 bytes int64 120 } 121 122 Verbosef("counting files in repo\n") 123 err = repo.List(ctx, restic.DataFile, func(restic.ID, int64) error { 124 stats.packs++ 125 return nil 126 }) 127 if err != nil { 128 return err 129 } 130 131 Verbosef("building new index for repo\n") 132 133 bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs") 134 idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar) 135 if err != nil { 136 return err 137 } 138 139 for _, id := range invalidFiles { 140 Warnf("incomplete pack file (will be removed): %v\n", id) 141 } 142 143 blobs := 0 144 for _, pack := range idx.Packs { 145 stats.bytes += pack.Size 146 blobs += len(pack.Entries) 147 } 148 Verbosef("repository contains %v packs (%v blobs) with %v\n", 149 len(idx.Packs), blobs, formatBytes(uint64(stats.bytes))) 150 151 blobCount := make(map[restic.BlobHandle]int) 152 duplicateBlobs := 0 153 duplicateBytes := 0 154 155 // find duplicate blobs 156 for _, p := range idx.Packs { 157 for _, entry := range p.Entries { 158 stats.blobs++ 159 h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} 160 blobCount[h]++ 161 162 if blobCount[h] > 1 { 163 duplicateBlobs++ 164 duplicateBytes += int(entry.Length) 165 } 166 } 167 } 168 169 Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n", 170 stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes))) 171 Verbosef("load all snapshots\n") 172 173 // find referenced blobs 174 snapshots, err := restic.LoadAllSnapshots(ctx, repo) 175 if err != nil { 176 return err 177 } 178 179 stats.snapshots = len(snapshots) 180 181 Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots) 182 183 usedBlobs := restic.NewBlobSet() 184 seenBlobs := restic.NewBlobSet() 185 186 bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots") 187 bar.Start() 188 for _, sn := range snapshots { 189 debug.Log("process snapshot %v", sn.ID()) 190 191 err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs) 192 if err != nil { 193 if repo.Backend().IsNotExist(err) { 194 return errors.Fatal("unable to load a tree from the repo: " + err.Error()) 195 } 196 197 return err 198 } 199 200 debug.Log("processed snapshot %v", sn.ID()) 201 bar.Report(restic.Stat{Blobs: 1}) 202 } 203 bar.Done() 204 205 if len(usedBlobs) > stats.blobs { 206 return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" + 207 "Please report this error (along with the output of the 'prune' run) at\n" + 208 "https://github.com/restic/restic/issues/new") 209 } 210 211 Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", 212 len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs)) 213 214 // find packs that need a rewrite 215 rewritePacks := restic.NewIDSet() 216 for _, pack := range idx.Packs { 217 if mixedBlobs(pack.Entries) { 218 rewritePacks.Insert(pack.ID) 219 continue 220 } 221 222 for _, blob := range pack.Entries { 223 h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} 224 if !usedBlobs.Has(h) { 225 rewritePacks.Insert(pack.ID) 226 continue 227 } 228 229 if blobCount[h] > 1 { 230 rewritePacks.Insert(pack.ID) 231 } 232 } 233 } 234 235 removeBytes := duplicateBytes 236 237 // find packs that are unneeded 238 removePacks := restic.NewIDSet() 239 240 Verbosef("will remove %d invalid files\n", len(invalidFiles)) 241 for _, id := range invalidFiles { 242 removePacks.Insert(id) 243 } 244 245 for packID, p := range idx.Packs { 246 247 hasActiveBlob := false 248 for _, blob := range p.Entries { 249 h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} 250 if usedBlobs.Has(h) { 251 hasActiveBlob = true 252 continue 253 } 254 255 removeBytes += int(blob.Length) 256 } 257 258 if hasActiveBlob { 259 continue 260 } 261 262 removePacks.Insert(packID) 263 264 if !rewritePacks.Has(packID) { 265 return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str()) 266 } 267 268 rewritePacks.Delete(packID) 269 } 270 271 Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n", 272 len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes))) 273 274 var obsoletePacks restic.IDSet 275 if len(rewritePacks) != 0 { 276 bar = newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten") 277 bar.Start() 278 obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar) 279 if err != nil { 280 return err 281 } 282 bar.Done() 283 } 284 285 removePacks.Merge(obsoletePacks) 286 287 if err = rebuildIndex(ctx, repo, removePacks); err != nil { 288 return err 289 } 290 291 if len(removePacks) != 0 { 292 bar = newProgressMax(!gopts.Quiet, uint64(len(removePacks)), "packs deleted") 293 bar.Start() 294 for packID := range removePacks { 295 h := restic.Handle{Type: restic.DataFile, Name: packID.String()} 296 err = repo.Backend().Remove(ctx, h) 297 if err != nil { 298 Warnf("unable to remove file %v from the repository\n", packID.Str()) 299 } 300 bar.Report(restic.Stat{Blobs: 1}) 301 } 302 bar.Done() 303 } 304 305 Verbosef("done\n") 306 return nil 307 }