github.com/fawick/restic@v0.1.1-0.20171126184616-c02923fbfc79/internal/repository/repack_test.go (about) 1 package repository_test 2 3 import ( 4 "context" 5 "io" 6 "math/rand" 7 "testing" 8 9 "github.com/restic/restic/internal/index" 10 "github.com/restic/restic/internal/repository" 11 "github.com/restic/restic/internal/restic" 12 ) 13 14 func randomSize(min, max int) int { 15 return rand.Intn(max-min) + min 16 } 17 18 func random(t testing.TB, length int) []byte { 19 rd := restic.NewRandReader(rand.New(rand.NewSource(int64(length)))) 20 buf := make([]byte, length) 21 _, err := io.ReadFull(rd, buf) 22 if err != nil { 23 t.Fatalf("unable to read %d random bytes: %v", length, err) 24 } 25 26 return buf 27 } 28 29 func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData float32) { 30 for i := 0; i < blobs; i++ { 31 var ( 32 tpe restic.BlobType 33 length int 34 ) 35 36 if rand.Float32() < pData { 37 tpe = restic.DataBlob 38 length = randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data 39 } else { 40 tpe = restic.TreeBlob 41 length = randomSize(1*1024, 20*1024) // 1KiB to 20KiB 42 } 43 44 buf := random(t, length) 45 id := restic.Hash(buf) 46 47 if repo.Index().Has(id, restic.DataBlob) { 48 t.Errorf("duplicate blob %v/%v ignored", id, restic.DataBlob) 49 continue 50 } 51 52 _, err := repo.SaveBlob(context.TODO(), tpe, buf, id) 53 if err != nil { 54 t.Fatalf("SaveFrom() error %v", err) 55 } 56 57 if rand.Float32() < 0.2 { 58 if err = repo.Flush(); err != nil { 59 t.Fatalf("repo.Flush() returned error %v", err) 60 } 61 } 62 } 63 64 if err := repo.Flush(); err != nil { 65 t.Fatalf("repo.Flush() returned error %v", err) 66 } 67 } 68 69 // selectBlobs splits the list of all blobs randomly into two lists. A blob 70 // will be contained in the firstone ith probability p. 71 func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) { 72 list1 = restic.NewBlobSet() 73 list2 = restic.NewBlobSet() 74 75 blobs := restic.NewBlobSet() 76 77 for id := range repo.List(context.TODO(), restic.DataFile) { 78 entries, _, err := repo.ListPack(context.TODO(), id) 79 if err != nil { 80 t.Fatalf("error listing pack %v: %v", id, err) 81 } 82 83 for _, entry := range entries { 84 h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} 85 if blobs.Has(h) { 86 t.Errorf("ignoring duplicate blob %v", h) 87 continue 88 } 89 blobs.Insert(h) 90 91 if rand.Float32() <= p { 92 list1.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) 93 } else { 94 list2.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) 95 } 96 97 } 98 } 99 100 return list1, list2 101 } 102 103 func listPacks(t *testing.T, repo restic.Repository) restic.IDSet { 104 list := restic.NewIDSet() 105 for id := range repo.List(context.TODO(), restic.DataFile) { 106 list.Insert(id) 107 } 108 109 return list 110 } 111 112 func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet { 113 packs := restic.NewIDSet() 114 115 idx := repo.Index() 116 for h := range blobs { 117 list, err := idx.Lookup(h.ID, h.Type) 118 if err != nil { 119 t.Fatal(err) 120 } 121 122 for _, pb := range list { 123 packs.Insert(pb.PackID) 124 } 125 } 126 127 return packs 128 } 129 130 func repack(t *testing.T, repo restic.Repository, packs restic.IDSet, blobs restic.BlobSet) { 131 repackedBlobs, err := repository.Repack(context.TODO(), repo, packs, blobs, nil) 132 if err != nil { 133 t.Fatal(err) 134 } 135 136 for id := range repackedBlobs { 137 err = repo.Backend().Remove(context.TODO(), restic.Handle{Type: restic.DataFile, Name: id.String()}) 138 if err != nil { 139 t.Fatal(err) 140 } 141 } 142 } 143 144 func saveIndex(t *testing.T, repo restic.Repository) { 145 if err := repo.SaveIndex(context.TODO()); err != nil { 146 t.Fatalf("repo.SaveIndex() %v", err) 147 } 148 } 149 150 func rebuildIndex(t *testing.T, repo restic.Repository) { 151 idx, _, err := index.New(context.TODO(), repo, restic.NewIDSet(), nil) 152 if err != nil { 153 t.Fatal(err) 154 } 155 156 for id := range repo.List(context.TODO(), restic.IndexFile) { 157 h := restic.Handle{ 158 Type: restic.IndexFile, 159 Name: id.String(), 160 } 161 err = repo.Backend().Remove(context.TODO(), h) 162 if err != nil { 163 t.Fatal(err) 164 } 165 } 166 167 _, err = idx.Save(context.TODO(), repo, nil) 168 if err != nil { 169 t.Fatal(err) 170 } 171 } 172 173 func reloadIndex(t *testing.T, repo restic.Repository) { 174 repo.SetIndex(repository.NewMasterIndex()) 175 if err := repo.LoadIndex(context.TODO()); err != nil { 176 t.Fatalf("error loading new index: %v", err) 177 } 178 } 179 180 func TestRepack(t *testing.T) { 181 repo, cleanup := repository.TestRepository(t) 182 defer cleanup() 183 184 createRandomBlobs(t, repo, 100, 0.7) 185 186 packsBefore := listPacks(t, repo) 187 188 // Running repack on empty ID sets should not do anything at all. 189 repack(t, repo, nil, nil) 190 191 packsAfter := listPacks(t, repo) 192 193 if !packsAfter.Equals(packsBefore) { 194 t.Fatalf("packs are not equal, Repack modified something. Before:\n %v\nAfter:\n %v", 195 packsBefore, packsAfter) 196 } 197 198 saveIndex(t, repo) 199 200 removeBlobs, keepBlobs := selectBlobs(t, repo, 0.2) 201 202 removePacks := findPacksForBlobs(t, repo, removeBlobs) 203 204 repack(t, repo, removePacks, keepBlobs) 205 rebuildIndex(t, repo) 206 reloadIndex(t, repo) 207 208 packsAfter = listPacks(t, repo) 209 for id := range removePacks { 210 if packsAfter.Has(id) { 211 t.Errorf("pack %v still present although it should have been repacked and removed", id.Str()) 212 } 213 } 214 215 idx := repo.Index() 216 217 for h := range keepBlobs { 218 list, err := idx.Lookup(h.ID, h.Type) 219 if err != nil { 220 t.Errorf("unable to find blob %v in repo", h.ID.Str()) 221 continue 222 } 223 224 if len(list) != 1 { 225 t.Errorf("expected one pack in the list, got: %v", list) 226 continue 227 } 228 229 pb := list[0] 230 231 if removePacks.Has(pb.PackID) { 232 t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID) 233 } 234 } 235 236 for h := range removeBlobs { 237 if _, err := idx.Lookup(h.ID, h.Type); err == nil { 238 t.Errorf("blob %v still contained in the repo", h) 239 } 240 } 241 }