github.com/mckael/restic@v0.8.3/internal/repository/repack_test.go (about)

     1  package repository_test
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"math/rand"
     7  	"testing"
     8  
     9  	"github.com/restic/restic/internal/index"
    10  	"github.com/restic/restic/internal/repository"
    11  	"github.com/restic/restic/internal/restic"
    12  )
    13  
    14  func randomSize(min, max int) int {
    15  	return rand.Intn(max-min) + min
    16  }
    17  
    18  func random(t testing.TB, length int) []byte {
    19  	rd := restic.NewRandReader(rand.New(rand.NewSource(rand.Int63())))
    20  	buf := make([]byte, length)
    21  	_, err := io.ReadFull(rd, buf)
    22  	if err != nil {
    23  		t.Fatalf("unable to read %d random bytes: %v", length, err)
    24  	}
    25  
    26  	return buf
    27  }
    28  
    29  func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData float32) {
    30  	for i := 0; i < blobs; i++ {
    31  		var (
    32  			tpe    restic.BlobType
    33  			length int
    34  		)
    35  
    36  		if rand.Float32() < pData {
    37  			tpe = restic.DataBlob
    38  			length = randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data
    39  		} else {
    40  			tpe = restic.TreeBlob
    41  			length = randomSize(1*1024, 20*1024) // 1KiB to 20KiB
    42  		}
    43  
    44  		buf := random(t, length)
    45  		id := restic.Hash(buf)
    46  
    47  		if repo.Index().Has(id, restic.DataBlob) {
    48  			t.Errorf("duplicate blob %v/%v ignored", id, restic.DataBlob)
    49  			continue
    50  		}
    51  
    52  		_, err := repo.SaveBlob(context.TODO(), tpe, buf, id)
    53  		if err != nil {
    54  			t.Fatalf("SaveFrom() error %v", err)
    55  		}
    56  
    57  		if rand.Float32() < 0.2 {
    58  			if err = repo.Flush(context.Background()); err != nil {
    59  				t.Fatalf("repo.Flush() returned error %v", err)
    60  			}
    61  		}
    62  	}
    63  
    64  	if err := repo.Flush(context.Background()); err != nil {
    65  		t.Fatalf("repo.Flush() returned error %v", err)
    66  	}
    67  }
    68  
    69  // selectBlobs splits the list of all blobs randomly into two lists. A blob
    70  // will be contained in the firstone ith probability p.
    71  func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) {
    72  	list1 = restic.NewBlobSet()
    73  	list2 = restic.NewBlobSet()
    74  
    75  	blobs := restic.NewBlobSet()
    76  
    77  	err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error {
    78  		entries, _, err := repo.ListPack(context.TODO(), id, size)
    79  		if err != nil {
    80  			t.Fatalf("error listing pack %v: %v", id, err)
    81  		}
    82  
    83  		for _, entry := range entries {
    84  			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
    85  			if blobs.Has(h) {
    86  				t.Errorf("ignoring duplicate blob %v", h)
    87  				return nil
    88  			}
    89  			blobs.Insert(h)
    90  
    91  			if rand.Float32() <= p {
    92  				list1.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
    93  			} else {
    94  				list2.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
    95  			}
    96  		}
    97  		return nil
    98  	})
    99  	if err != nil {
   100  		t.Fatal(err)
   101  	}
   102  
   103  	return list1, list2
   104  }
   105  
   106  func listPacks(t *testing.T, repo restic.Repository) restic.IDSet {
   107  	list := restic.NewIDSet()
   108  	err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error {
   109  		list.Insert(id)
   110  		return nil
   111  	})
   112  
   113  	if err != nil {
   114  		t.Fatal(err)
   115  	}
   116  
   117  	return list
   118  }
   119  
   120  func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet {
   121  	packs := restic.NewIDSet()
   122  
   123  	idx := repo.Index()
   124  	for h := range blobs {
   125  		list, found := idx.Lookup(h.ID, h.Type)
   126  		if !found {
   127  			t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type)
   128  		}
   129  
   130  		for _, pb := range list {
   131  			packs.Insert(pb.PackID)
   132  		}
   133  	}
   134  
   135  	return packs
   136  }
   137  
   138  func repack(t *testing.T, repo restic.Repository, packs restic.IDSet, blobs restic.BlobSet) {
   139  	repackedBlobs, err := repository.Repack(context.TODO(), repo, packs, blobs, nil)
   140  	if err != nil {
   141  		t.Fatal(err)
   142  	}
   143  
   144  	for id := range repackedBlobs {
   145  		err = repo.Backend().Remove(context.TODO(), restic.Handle{Type: restic.DataFile, Name: id.String()})
   146  		if err != nil {
   147  			t.Fatal(err)
   148  		}
   149  	}
   150  }
   151  
   152  func saveIndex(t *testing.T, repo restic.Repository) {
   153  	if err := repo.SaveIndex(context.TODO()); err != nil {
   154  		t.Fatalf("repo.SaveIndex() %v", err)
   155  	}
   156  }
   157  
   158  func rebuildIndex(t *testing.T, repo restic.Repository) {
   159  	idx, _, err := index.New(context.TODO(), repo, restic.NewIDSet(), nil)
   160  	if err != nil {
   161  		t.Fatal(err)
   162  	}
   163  
   164  	err = repo.List(context.TODO(), restic.IndexFile, func(id restic.ID, size int64) error {
   165  		h := restic.Handle{
   166  			Type: restic.IndexFile,
   167  			Name: id.String(),
   168  		}
   169  		return repo.Backend().Remove(context.TODO(), h)
   170  	})
   171  	if err != nil {
   172  		t.Fatal(err)
   173  	}
   174  
   175  	_, err = idx.Save(context.TODO(), repo, nil)
   176  	if err != nil {
   177  		t.Fatal(err)
   178  	}
   179  }
   180  
   181  func reloadIndex(t *testing.T, repo restic.Repository) {
   182  	repo.SetIndex(repository.NewMasterIndex())
   183  	if err := repo.LoadIndex(context.TODO()); err != nil {
   184  		t.Fatalf("error loading new index: %v", err)
   185  	}
   186  }
   187  
   188  func TestRepack(t *testing.T) {
   189  	repo, cleanup := repository.TestRepository(t)
   190  	defer cleanup()
   191  
   192  	seed := rand.Int63()
   193  	rand.Seed(seed)
   194  	t.Logf("rand seed is %v", seed)
   195  
   196  	createRandomBlobs(t, repo, 100, 0.7)
   197  
   198  	packsBefore := listPacks(t, repo)
   199  
   200  	// Running repack on empty ID sets should not do anything at all.
   201  	repack(t, repo, nil, nil)
   202  
   203  	packsAfter := listPacks(t, repo)
   204  
   205  	if !packsAfter.Equals(packsBefore) {
   206  		t.Fatalf("packs are not equal, Repack modified something. Before:\n  %v\nAfter:\n  %v",
   207  			packsBefore, packsAfter)
   208  	}
   209  
   210  	saveIndex(t, repo)
   211  
   212  	removeBlobs, keepBlobs := selectBlobs(t, repo, 0.2)
   213  
   214  	removePacks := findPacksForBlobs(t, repo, removeBlobs)
   215  
   216  	repack(t, repo, removePacks, keepBlobs)
   217  	rebuildIndex(t, repo)
   218  	reloadIndex(t, repo)
   219  
   220  	packsAfter = listPacks(t, repo)
   221  	for id := range removePacks {
   222  		if packsAfter.Has(id) {
   223  			t.Errorf("pack %v still present although it should have been repacked and removed", id.Str())
   224  		}
   225  	}
   226  
   227  	idx := repo.Index()
   228  
   229  	for h := range keepBlobs {
   230  		list, found := idx.Lookup(h.ID, h.Type)
   231  		if !found {
   232  			t.Errorf("unable to find blob %v in repo", h.ID.Str())
   233  			continue
   234  		}
   235  
   236  		if len(list) != 1 {
   237  			t.Errorf("expected one pack in the list, got: %v", list)
   238  			continue
   239  		}
   240  
   241  		pb := list[0]
   242  
   243  		if removePacks.Has(pb.PackID) {
   244  			t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID)
   245  		}
   246  	}
   247  
   248  	for h := range removeBlobs {
   249  		if _, found := idx.Lookup(h.ID, h.Type); found {
   250  			t.Errorf("blob %v still contained in the repo", h)
   251  		}
   252  	}
   253  }