github.com/mckael/restic@v0.8.3/internal/index/index.go (about) 1 // Package index contains various data structures for indexing content in a repository or backend. 2 package index 3 4 import ( 5 "context" 6 "fmt" 7 "os" 8 9 "github.com/restic/restic/internal/debug" 10 "github.com/restic/restic/internal/list" 11 "github.com/restic/restic/internal/pack" 12 "github.com/restic/restic/internal/restic" 13 "github.com/restic/restic/internal/worker" 14 15 "github.com/restic/restic/internal/errors" 16 ) 17 18 // Pack contains information about the contents of a pack. 19 type Pack struct { 20 ID restic.ID 21 Size int64 22 Entries []restic.Blob 23 } 24 25 // Index contains information about blobs and packs stored in a repo. 26 type Index struct { 27 Packs map[restic.ID]Pack 28 IndexIDs restic.IDSet 29 } 30 31 func newIndex() *Index { 32 return &Index{ 33 Packs: make(map[restic.ID]Pack), 34 IndexIDs: restic.NewIDSet(), 35 } 36 } 37 38 // New creates a new index for repo from scratch. InvalidFiles contains all IDs 39 // of files that cannot be listed successfully. 40 func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, p *restic.Progress) (idx *Index, invalidFiles restic.IDs, err error) { 41 p.Start() 42 defer p.Done() 43 44 ch := make(chan worker.Job) 45 go list.AllPacks(ctx, repo, ignorePacks, ch) 46 47 idx = newIndex() 48 49 for job := range ch { 50 p.Report(restic.Stat{Blobs: 1}) 51 52 j := job.Result.(list.Result) 53 if job.Error != nil { 54 cause := errors.Cause(job.Error) 55 if _, ok := cause.(pack.InvalidFileError); ok { 56 invalidFiles = append(invalidFiles, j.PackID()) 57 continue 58 } 59 60 fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", j.PackID(), job.Error) 61 continue 62 } 63 64 debug.Log("pack %v contains %d blobs", j.PackID(), len(j.Entries())) 65 66 err := idx.AddPack(j.PackID(), j.Size(), j.Entries()) 67 if err != nil { 68 return nil, nil, err 69 } 70 } 71 72 return idx, invalidFiles, nil 73 } 74 75 type packJSON struct { 76 ID restic.ID `json:"id"` 77 Blobs []blobJSON `json:"blobs"` 78 } 79 80 type blobJSON struct { 81 ID restic.ID `json:"id"` 82 Type restic.BlobType `json:"type"` 83 Offset uint `json:"offset"` 84 Length uint `json:"length"` 85 } 86 87 type indexJSON struct { 88 Supersedes restic.IDs `json:"supersedes,omitempty"` 89 Packs []packJSON `json:"packs"` 90 } 91 92 func loadIndexJSON(ctx context.Context, repo restic.Repository, id restic.ID) (*indexJSON, error) { 93 debug.Log("process index %v\n", id) 94 95 var idx indexJSON 96 err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx) 97 if err != nil { 98 return nil, err 99 } 100 101 return &idx, nil 102 } 103 104 // Load creates an index by loading all index files from the repo. 105 func Load(ctx context.Context, repo restic.Repository, p *restic.Progress) (*Index, error) { 106 debug.Log("loading indexes") 107 108 p.Start() 109 defer p.Done() 110 111 supersedes := make(map[restic.ID]restic.IDSet) 112 results := make(map[restic.ID]map[restic.ID]Pack) 113 114 index := newIndex() 115 116 err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error { 117 p.Report(restic.Stat{Blobs: 1}) 118 119 debug.Log("Load index %v", id) 120 idx, err := loadIndexJSON(ctx, repo, id) 121 if err != nil { 122 return err 123 } 124 125 res := make(map[restic.ID]Pack) 126 supersedes[id] = restic.NewIDSet() 127 for _, sid := range idx.Supersedes { 128 debug.Log(" index %v supersedes %v", id, sid) 129 supersedes[id].Insert(sid) 130 } 131 132 for _, jpack := range idx.Packs { 133 entries := make([]restic.Blob, 0, len(jpack.Blobs)) 134 for _, blob := range jpack.Blobs { 135 entry := restic.Blob{ 136 ID: blob.ID, 137 Type: blob.Type, 138 Offset: blob.Offset, 139 Length: blob.Length, 140 } 141 entries = append(entries, entry) 142 } 143 144 if err = index.AddPack(jpack.ID, 0, entries); err != nil { 145 return err 146 } 147 } 148 149 results[id] = res 150 index.IndexIDs.Insert(id) 151 152 return nil 153 }) 154 155 if err != nil { 156 return nil, err 157 } 158 159 for superID, list := range supersedes { 160 for indexID := range list { 161 if _, ok := results[indexID]; !ok { 162 continue 163 } 164 debug.Log(" removing index %v, superseded by %v", indexID, superID) 165 fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str()) 166 delete(results, indexID) 167 } 168 } 169 170 return index, nil 171 } 172 173 // AddPack adds a pack to the index. If this pack is already in the index, an 174 // error is returned. 175 func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error { 176 if _, ok := idx.Packs[id]; ok { 177 return errors.Errorf("pack %v already present in the index", id.Str()) 178 } 179 180 idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries} 181 182 return nil 183 } 184 185 // RemovePack deletes a pack from the index. 186 func (idx *Index) RemovePack(id restic.ID) error { 187 if _, ok := idx.Packs[id]; !ok { 188 return errors.Errorf("pack %v not found in the index", id.Str()) 189 } 190 191 delete(idx.Packs, id) 192 193 return nil 194 } 195 196 // DuplicateBlobs returns a list of blobs that are stored more than once in the 197 // repo. 198 func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) { 199 dups = restic.NewBlobSet() 200 seen := restic.NewBlobSet() 201 202 for _, p := range idx.Packs { 203 for _, entry := range p.Entries { 204 h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} 205 if seen.Has(h) { 206 dups.Insert(h) 207 } 208 seen.Insert(h) 209 } 210 } 211 212 return dups 213 } 214 215 // PacksForBlobs returns the set of packs in which the blobs are contained. 216 func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { 217 packs = restic.NewIDSet() 218 219 for id, p := range idx.Packs { 220 for _, entry := range p.Entries { 221 if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) { 222 packs.Insert(id) 223 } 224 } 225 } 226 227 return packs 228 } 229 230 // Location describes the location of a blob in a pack. 231 type Location struct { 232 PackID restic.ID 233 restic.Blob 234 } 235 236 // ErrBlobNotFound is return by FindBlob when the blob could not be found in 237 // the index. 238 var ErrBlobNotFound = errors.New("blob not found in index") 239 240 // FindBlob returns a list of packs and positions the blob can be found in. 241 func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) { 242 for id, p := range idx.Packs { 243 for _, entry := range p.Entries { 244 if entry.ID.Equal(h.ID) && entry.Type == h.Type { 245 result = append(result, Location{ 246 PackID: id, 247 Blob: entry, 248 }) 249 } 250 } 251 } 252 253 if len(result) == 0 { 254 return nil, ErrBlobNotFound 255 } 256 257 return result, nil 258 } 259 260 const maxEntries = 3000 261 262 // Save writes the complete index to the repo. 263 func (idx *Index) Save(ctx context.Context, repo restic.Repository, supersedes restic.IDs) (restic.IDs, error) { 264 debug.Log("pack files: %d\n", len(idx.Packs)) 265 266 var indexIDs []restic.ID 267 268 packs := 0 269 jsonIDX := &indexJSON{ 270 Supersedes: supersedes, 271 Packs: make([]packJSON, 0, maxEntries), 272 } 273 274 for packID, pack := range idx.Packs { 275 debug.Log("%04d add pack %v with %d entries", packs, packID, len(pack.Entries)) 276 b := make([]blobJSON, 0, len(pack.Entries)) 277 for _, blob := range pack.Entries { 278 b = append(b, blobJSON{ 279 ID: blob.ID, 280 Type: blob.Type, 281 Offset: blob.Offset, 282 Length: blob.Length, 283 }) 284 } 285 286 p := packJSON{ 287 ID: packID, 288 Blobs: b, 289 } 290 291 jsonIDX.Packs = append(jsonIDX.Packs, p) 292 293 packs++ 294 if packs == maxEntries { 295 id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX) 296 if err != nil { 297 return nil, err 298 } 299 debug.Log("saved new index as %v", id) 300 301 indexIDs = append(indexIDs, id) 302 packs = 0 303 jsonIDX.Packs = jsonIDX.Packs[:0] 304 } 305 } 306 307 if packs > 0 { 308 id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX) 309 if err != nil { 310 return nil, err 311 } 312 debug.Log("saved new index as %v", id) 313 indexIDs = append(indexIDs, id) 314 } 315 316 return indexIDs, nil 317 }