github.com/fawick/restic@v0.1.1-0.20171126184616-c02923fbfc79/internal/repository/repack_test.go (about)

     1  package repository_test
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"math/rand"
     7  	"testing"
     8  
     9  	"github.com/restic/restic/internal/index"
    10  	"github.com/restic/restic/internal/repository"
    11  	"github.com/restic/restic/internal/restic"
    12  )
    13  
    14  func randomSize(min, max int) int {
    15  	return rand.Intn(max-min) + min
    16  }
    17  
    18  func random(t testing.TB, length int) []byte {
    19  	rd := restic.NewRandReader(rand.New(rand.NewSource(int64(length))))
    20  	buf := make([]byte, length)
    21  	_, err := io.ReadFull(rd, buf)
    22  	if err != nil {
    23  		t.Fatalf("unable to read %d random bytes: %v", length, err)
    24  	}
    25  
    26  	return buf
    27  }
    28  
    29  func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData float32) {
    30  	for i := 0; i < blobs; i++ {
    31  		var (
    32  			tpe    restic.BlobType
    33  			length int
    34  		)
    35  
    36  		if rand.Float32() < pData {
    37  			tpe = restic.DataBlob
    38  			length = randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data
    39  		} else {
    40  			tpe = restic.TreeBlob
    41  			length = randomSize(1*1024, 20*1024) // 1KiB to 20KiB
    42  		}
    43  
    44  		buf := random(t, length)
    45  		id := restic.Hash(buf)
    46  
    47  		if repo.Index().Has(id, restic.DataBlob) {
    48  			t.Errorf("duplicate blob %v/%v ignored", id, restic.DataBlob)
    49  			continue
    50  		}
    51  
    52  		_, err := repo.SaveBlob(context.TODO(), tpe, buf, id)
    53  		if err != nil {
    54  			t.Fatalf("SaveFrom() error %v", err)
    55  		}
    56  
    57  		if rand.Float32() < 0.2 {
    58  			if err = repo.Flush(); err != nil {
    59  				t.Fatalf("repo.Flush() returned error %v", err)
    60  			}
    61  		}
    62  	}
    63  
    64  	if err := repo.Flush(); err != nil {
    65  		t.Fatalf("repo.Flush() returned error %v", err)
    66  	}
    67  }
    68  
    69  // selectBlobs splits the list of all blobs randomly into two lists. A blob
    70  // will be contained in the firstone ith probability p.
    71  func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) {
    72  	list1 = restic.NewBlobSet()
    73  	list2 = restic.NewBlobSet()
    74  
    75  	blobs := restic.NewBlobSet()
    76  
    77  	for id := range repo.List(context.TODO(), restic.DataFile) {
    78  		entries, _, err := repo.ListPack(context.TODO(), id)
    79  		if err != nil {
    80  			t.Fatalf("error listing pack %v: %v", id, err)
    81  		}
    82  
    83  		for _, entry := range entries {
    84  			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
    85  			if blobs.Has(h) {
    86  				t.Errorf("ignoring duplicate blob %v", h)
    87  				continue
    88  			}
    89  			blobs.Insert(h)
    90  
    91  			if rand.Float32() <= p {
    92  				list1.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
    93  			} else {
    94  				list2.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
    95  			}
    96  
    97  		}
    98  	}
    99  
   100  	return list1, list2
   101  }
   102  
   103  func listPacks(t *testing.T, repo restic.Repository) restic.IDSet {
   104  	list := restic.NewIDSet()
   105  	for id := range repo.List(context.TODO(), restic.DataFile) {
   106  		list.Insert(id)
   107  	}
   108  
   109  	return list
   110  }
   111  
   112  func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet {
   113  	packs := restic.NewIDSet()
   114  
   115  	idx := repo.Index()
   116  	for h := range blobs {
   117  		list, err := idx.Lookup(h.ID, h.Type)
   118  		if err != nil {
   119  			t.Fatal(err)
   120  		}
   121  
   122  		for _, pb := range list {
   123  			packs.Insert(pb.PackID)
   124  		}
   125  	}
   126  
   127  	return packs
   128  }
   129  
   130  func repack(t *testing.T, repo restic.Repository, packs restic.IDSet, blobs restic.BlobSet) {
   131  	repackedBlobs, err := repository.Repack(context.TODO(), repo, packs, blobs, nil)
   132  	if err != nil {
   133  		t.Fatal(err)
   134  	}
   135  
   136  	for id := range repackedBlobs {
   137  		err = repo.Backend().Remove(context.TODO(), restic.Handle{Type: restic.DataFile, Name: id.String()})
   138  		if err != nil {
   139  			t.Fatal(err)
   140  		}
   141  	}
   142  }
   143  
   144  func saveIndex(t *testing.T, repo restic.Repository) {
   145  	if err := repo.SaveIndex(context.TODO()); err != nil {
   146  		t.Fatalf("repo.SaveIndex() %v", err)
   147  	}
   148  }
   149  
   150  func rebuildIndex(t *testing.T, repo restic.Repository) {
   151  	idx, _, err := index.New(context.TODO(), repo, restic.NewIDSet(), nil)
   152  	if err != nil {
   153  		t.Fatal(err)
   154  	}
   155  
   156  	for id := range repo.List(context.TODO(), restic.IndexFile) {
   157  		h := restic.Handle{
   158  			Type: restic.IndexFile,
   159  			Name: id.String(),
   160  		}
   161  		err = repo.Backend().Remove(context.TODO(), h)
   162  		if err != nil {
   163  			t.Fatal(err)
   164  		}
   165  	}
   166  
   167  	_, err = idx.Save(context.TODO(), repo, nil)
   168  	if err != nil {
   169  		t.Fatal(err)
   170  	}
   171  }
   172  
   173  func reloadIndex(t *testing.T, repo restic.Repository) {
   174  	repo.SetIndex(repository.NewMasterIndex())
   175  	if err := repo.LoadIndex(context.TODO()); err != nil {
   176  		t.Fatalf("error loading new index: %v", err)
   177  	}
   178  }
   179  
   180  func TestRepack(t *testing.T) {
   181  	repo, cleanup := repository.TestRepository(t)
   182  	defer cleanup()
   183  
   184  	createRandomBlobs(t, repo, 100, 0.7)
   185  
   186  	packsBefore := listPacks(t, repo)
   187  
   188  	// Running repack on empty ID sets should not do anything at all.
   189  	repack(t, repo, nil, nil)
   190  
   191  	packsAfter := listPacks(t, repo)
   192  
   193  	if !packsAfter.Equals(packsBefore) {
   194  		t.Fatalf("packs are not equal, Repack modified something. Before:\n  %v\nAfter:\n  %v",
   195  			packsBefore, packsAfter)
   196  	}
   197  
   198  	saveIndex(t, repo)
   199  
   200  	removeBlobs, keepBlobs := selectBlobs(t, repo, 0.2)
   201  
   202  	removePacks := findPacksForBlobs(t, repo, removeBlobs)
   203  
   204  	repack(t, repo, removePacks, keepBlobs)
   205  	rebuildIndex(t, repo)
   206  	reloadIndex(t, repo)
   207  
   208  	packsAfter = listPacks(t, repo)
   209  	for id := range removePacks {
   210  		if packsAfter.Has(id) {
   211  			t.Errorf("pack %v still present although it should have been repacked and removed", id.Str())
   212  		}
   213  	}
   214  
   215  	idx := repo.Index()
   216  
   217  	for h := range keepBlobs {
   218  		list, err := idx.Lookup(h.ID, h.Type)
   219  		if err != nil {
   220  			t.Errorf("unable to find blob %v in repo", h.ID.Str())
   221  			continue
   222  		}
   223  
   224  		if len(list) != 1 {
   225  			t.Errorf("expected one pack in the list, got: %v", list)
   226  			continue
   227  		}
   228  
   229  		pb := list[0]
   230  
   231  		if removePacks.Has(pb.PackID) {
   232  			t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID)
   233  		}
   234  	}
   235  
   236  	for h := range removeBlobs {
   237  		if _, err := idx.Lookup(h.ID, h.Type); err == nil {
   238  			t.Errorf("blob %v still contained in the repo", h)
   239  		}
   240  	}
   241  }