github.com/mckael/restic@v0.8.3/cmd/restic/cmd_prune.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/restic/restic/internal/debug"
     8  	"github.com/restic/restic/internal/errors"
     9  	"github.com/restic/restic/internal/index"
    10  	"github.com/restic/restic/internal/repository"
    11  	"github.com/restic/restic/internal/restic"
    12  
    13  	"github.com/spf13/cobra"
    14  )
    15  
    16  var cmdPrune = &cobra.Command{
    17  	Use:   "prune [flags]",
    18  	Short: "Remove unneeded data from the repository",
    19  	Long: `
    20  The "prune" command checks the repository and removes data that is not
    21  referenced and therefore not needed any more.
    22  `,
    23  	DisableAutoGenTag: true,
    24  	RunE: func(cmd *cobra.Command, args []string) error {
    25  		return runPrune(globalOptions)
    26  	},
    27  }
    28  
    29  func init() {
    30  	cmdRoot.AddCommand(cmdPrune)
    31  }
    32  
    33  func shortenStatus(maxLength int, s string) string {
    34  	if len(s) <= maxLength {
    35  		return s
    36  	}
    37  
    38  	if maxLength < 3 {
    39  		return s[:maxLength]
    40  	}
    41  
    42  	return s[:maxLength-3] + "..."
    43  }
    44  
    45  // newProgressMax returns a progress that counts blobs.
    46  func newProgressMax(show bool, max uint64, description string) *restic.Progress {
    47  	if !show {
    48  		return nil
    49  	}
    50  
    51  	p := restic.NewProgress()
    52  
    53  	p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) {
    54  		status := fmt.Sprintf("[%s] %s  %d / %d %s",
    55  			formatDuration(d),
    56  			formatPercent(s.Blobs, max),
    57  			s.Blobs, max, description)
    58  
    59  		if w := stdoutTerminalWidth(); w > 0 {
    60  			status = shortenStatus(w, status)
    61  		}
    62  
    63  		PrintProgress("%s", status)
    64  	}
    65  
    66  	p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) {
    67  		fmt.Printf("\n")
    68  	}
    69  
    70  	return p
    71  }
    72  
    73  func runPrune(gopts GlobalOptions) error {
    74  	repo, err := OpenRepository(gopts)
    75  	if err != nil {
    76  		return err
    77  	}
    78  
    79  	lock, err := lockRepoExclusive(repo)
    80  	defer unlockRepo(lock)
    81  	if err != nil {
    82  		return err
    83  	}
    84  
    85  	return pruneRepository(gopts, repo)
    86  }
    87  
    88  func mixedBlobs(list []restic.Blob) bool {
    89  	var tree, data bool
    90  
    91  	for _, pb := range list {
    92  		switch pb.Type {
    93  		case restic.TreeBlob:
    94  			tree = true
    95  		case restic.DataBlob:
    96  			data = true
    97  		}
    98  
    99  		if tree && data {
   100  			return true
   101  		}
   102  	}
   103  
   104  	return false
   105  }
   106  
   107  func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
   108  	ctx := gopts.ctx
   109  
   110  	err := repo.LoadIndex(ctx)
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	var stats struct {
   116  		blobs     int
   117  		packs     int
   118  		snapshots int
   119  		bytes     int64
   120  	}
   121  
   122  	Verbosef("counting files in repo\n")
   123  	err = repo.List(ctx, restic.DataFile, func(restic.ID, int64) error {
   124  		stats.packs++
   125  		return nil
   126  	})
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	Verbosef("building new index for repo\n")
   132  
   133  	bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
   134  	idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
   135  	if err != nil {
   136  		return err
   137  	}
   138  
   139  	for _, id := range invalidFiles {
   140  		Warnf("incomplete pack file (will be removed): %v\n", id)
   141  	}
   142  
   143  	blobs := 0
   144  	for _, pack := range idx.Packs {
   145  		stats.bytes += pack.Size
   146  		blobs += len(pack.Entries)
   147  	}
   148  	Verbosef("repository contains %v packs (%v blobs) with %v\n",
   149  		len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
   150  
   151  	blobCount := make(map[restic.BlobHandle]int)
   152  	duplicateBlobs := 0
   153  	duplicateBytes := 0
   154  
   155  	// find duplicate blobs
   156  	for _, p := range idx.Packs {
   157  		for _, entry := range p.Entries {
   158  			stats.blobs++
   159  			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
   160  			blobCount[h]++
   161  
   162  			if blobCount[h] > 1 {
   163  				duplicateBlobs++
   164  				duplicateBytes += int(entry.Length)
   165  			}
   166  		}
   167  	}
   168  
   169  	Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
   170  		stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
   171  	Verbosef("load all snapshots\n")
   172  
   173  	// find referenced blobs
   174  	snapshots, err := restic.LoadAllSnapshots(ctx, repo)
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	stats.snapshots = len(snapshots)
   180  
   181  	Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots)
   182  
   183  	usedBlobs := restic.NewBlobSet()
   184  	seenBlobs := restic.NewBlobSet()
   185  
   186  	bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
   187  	bar.Start()
   188  	for _, sn := range snapshots {
   189  		debug.Log("process snapshot %v", sn.ID())
   190  
   191  		err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs)
   192  		if err != nil {
   193  			if repo.Backend().IsNotExist(err) {
   194  				return errors.Fatal("unable to load a tree from the repo: " + err.Error())
   195  			}
   196  
   197  			return err
   198  		}
   199  
   200  		debug.Log("processed snapshot %v", sn.ID())
   201  		bar.Report(restic.Stat{Blobs: 1})
   202  	}
   203  	bar.Done()
   204  
   205  	if len(usedBlobs) > stats.blobs {
   206  		return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" +
   207  			"Please report this error (along with the output of the 'prune' run) at\n" +
   208  			"https://github.com/restic/restic/issues/new")
   209  	}
   210  
   211  	Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
   212  		len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
   213  
   214  	// find packs that need a rewrite
   215  	rewritePacks := restic.NewIDSet()
   216  	for _, pack := range idx.Packs {
   217  		if mixedBlobs(pack.Entries) {
   218  			rewritePacks.Insert(pack.ID)
   219  			continue
   220  		}
   221  
   222  		for _, blob := range pack.Entries {
   223  			h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
   224  			if !usedBlobs.Has(h) {
   225  				rewritePacks.Insert(pack.ID)
   226  				continue
   227  			}
   228  
   229  			if blobCount[h] > 1 {
   230  				rewritePacks.Insert(pack.ID)
   231  			}
   232  		}
   233  	}
   234  
   235  	removeBytes := duplicateBytes
   236  
   237  	// find packs that are unneeded
   238  	removePacks := restic.NewIDSet()
   239  
   240  	Verbosef("will remove %d invalid files\n", len(invalidFiles))
   241  	for _, id := range invalidFiles {
   242  		removePacks.Insert(id)
   243  	}
   244  
   245  	for packID, p := range idx.Packs {
   246  
   247  		hasActiveBlob := false
   248  		for _, blob := range p.Entries {
   249  			h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
   250  			if usedBlobs.Has(h) {
   251  				hasActiveBlob = true
   252  				continue
   253  			}
   254  
   255  			removeBytes += int(blob.Length)
   256  		}
   257  
   258  		if hasActiveBlob {
   259  			continue
   260  		}
   261  
   262  		removePacks.Insert(packID)
   263  
   264  		if !rewritePacks.Has(packID) {
   265  			return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
   266  		}
   267  
   268  		rewritePacks.Delete(packID)
   269  	}
   270  
   271  	Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
   272  		len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
   273  
   274  	var obsoletePacks restic.IDSet
   275  	if len(rewritePacks) != 0 {
   276  		bar = newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
   277  		bar.Start()
   278  		obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
   279  		if err != nil {
   280  			return err
   281  		}
   282  		bar.Done()
   283  	}
   284  
   285  	removePacks.Merge(obsoletePacks)
   286  
   287  	if err = rebuildIndex(ctx, repo, removePacks); err != nil {
   288  		return err
   289  	}
   290  
   291  	if len(removePacks) != 0 {
   292  		bar = newProgressMax(!gopts.Quiet, uint64(len(removePacks)), "packs deleted")
   293  		bar.Start()
   294  		for packID := range removePacks {
   295  			h := restic.Handle{Type: restic.DataFile, Name: packID.String()}
   296  			err = repo.Backend().Remove(ctx, h)
   297  			if err != nil {
   298  				Warnf("unable to remove file %v from the repository\n", packID.Str())
   299  			}
   300  			bar.Report(restic.Stat{Blobs: 1})
   301  		}
   302  		bar.Done()
   303  	}
   304  
   305  	Verbosef("done\n")
   306  	return nil
   307  }