github.com/advanderveer/restic@v0.8.1-0.20171209104529-42a8c19aaea6/internal/index/index.go (about) 1 // Package index contains various data structures for indexing content in a repository or backend. 2 package index 3 4 import ( 5 "context" 6 "fmt" 7 "os" 8 9 "github.com/restic/restic/internal/debug" 10 "github.com/restic/restic/internal/list" 11 "github.com/restic/restic/internal/pack" 12 "github.com/restic/restic/internal/restic" 13 "github.com/restic/restic/internal/worker" 14 15 "github.com/restic/restic/internal/errors" 16 ) 17 18 // Pack contains information about the contents of a pack. 19 type Pack struct { 20 ID restic.ID 21 Size int64 22 Entries []restic.Blob 23 } 24 25 // Index contains information about blobs and packs stored in a repo. 26 type Index struct { 27 Packs map[restic.ID]Pack 28 IndexIDs restic.IDSet 29 } 30 31 func newIndex() *Index { 32 return &Index{ 33 Packs: make(map[restic.ID]Pack), 34 IndexIDs: restic.NewIDSet(), 35 } 36 } 37 38 // New creates a new index for repo from scratch. InvalidFiles contains all IDs 39 // of files that cannot be listed successfully. 40 func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, p *restic.Progress) (idx *Index, invalidFiles restic.IDs, err error) { 41 p.Start() 42 defer p.Done() 43 44 ch := make(chan worker.Job) 45 go list.AllPacks(ctx, repo, ignorePacks, ch) 46 47 idx = newIndex() 48 49 for job := range ch { 50 p.Report(restic.Stat{Blobs: 1}) 51 52 packID := job.Data.(restic.ID) 53 if job.Error != nil { 54 cause := errors.Cause(job.Error) 55 if _, ok := cause.(pack.InvalidFileError); ok { 56 invalidFiles = append(invalidFiles, packID) 57 continue 58 } 59 60 fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error) 61 continue 62 } 63 64 j := job.Result.(list.Result) 65 66 debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries())) 67 68 err := idx.AddPack(packID, j.Size(), j.Entries()) 69 if err != nil { 70 return nil, nil, err 71 } 72 } 73 74 return idx, invalidFiles, nil 75 } 76 77 type packJSON struct { 78 ID restic.ID `json:"id"` 79 Blobs []blobJSON `json:"blobs"` 80 } 81 82 type blobJSON struct { 83 ID restic.ID `json:"id"` 84 Type restic.BlobType `json:"type"` 85 Offset uint `json:"offset"` 86 Length uint `json:"length"` 87 } 88 89 type indexJSON struct { 90 Supersedes restic.IDs `json:"supersedes,omitempty"` 91 Packs []*packJSON `json:"packs"` 92 } 93 94 func loadIndexJSON(ctx context.Context, repo restic.Repository, id restic.ID) (*indexJSON, error) { 95 debug.Log("process index %v\n", id.Str()) 96 97 var idx indexJSON 98 err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx) 99 if err != nil { 100 return nil, err 101 } 102 103 return &idx, nil 104 } 105 106 // Load creates an index by loading all index files from the repo. 107 func Load(ctx context.Context, repo restic.Repository, p *restic.Progress) (*Index, error) { 108 debug.Log("loading indexes") 109 110 p.Start() 111 defer p.Done() 112 113 supersedes := make(map[restic.ID]restic.IDSet) 114 results := make(map[restic.ID]map[restic.ID]Pack) 115 116 index := newIndex() 117 118 for id := range repo.List(ctx, restic.IndexFile) { 119 p.Report(restic.Stat{Blobs: 1}) 120 121 debug.Log("Load index %v", id.Str()) 122 idx, err := loadIndexJSON(ctx, repo, id) 123 if err != nil { 124 return nil, err 125 } 126 127 res := make(map[restic.ID]Pack) 128 supersedes[id] = restic.NewIDSet() 129 for _, sid := range idx.Supersedes { 130 debug.Log(" index %v supersedes %v", id.Str(), sid) 131 supersedes[id].Insert(sid) 132 } 133 134 for _, jpack := range idx.Packs { 135 entries := make([]restic.Blob, 0, len(jpack.Blobs)) 136 for _, blob := range jpack.Blobs { 137 entry := restic.Blob{ 138 ID: blob.ID, 139 Type: blob.Type, 140 Offset: blob.Offset, 141 Length: blob.Length, 142 } 143 entries = append(entries, entry) 144 } 145 146 if err = index.AddPack(jpack.ID, 0, entries); err != nil { 147 return nil, err 148 } 149 } 150 151 results[id] = res 152 index.IndexIDs.Insert(id) 153 } 154 155 for superID, list := range supersedes { 156 for indexID := range list { 157 if _, ok := results[indexID]; !ok { 158 continue 159 } 160 debug.Log(" removing index %v, superseded by %v", indexID.Str(), superID.Str()) 161 fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str()) 162 delete(results, indexID) 163 } 164 } 165 166 return index, nil 167 } 168 169 // AddPack adds a pack to the index. If this pack is already in the index, an 170 // error is returned. 171 func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error { 172 if _, ok := idx.Packs[id]; ok { 173 return errors.Errorf("pack %v already present in the index", id.Str()) 174 } 175 176 idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries} 177 178 return nil 179 } 180 181 // RemovePack deletes a pack from the index. 182 func (idx *Index) RemovePack(id restic.ID) error { 183 if _, ok := idx.Packs[id]; !ok { 184 return errors.Errorf("pack %v not found in the index", id.Str()) 185 } 186 187 delete(idx.Packs, id) 188 189 return nil 190 } 191 192 // DuplicateBlobs returns a list of blobs that are stored more than once in the 193 // repo. 194 func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) { 195 dups = restic.NewBlobSet() 196 seen := restic.NewBlobSet() 197 198 for _, p := range idx.Packs { 199 for _, entry := range p.Entries { 200 h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} 201 if seen.Has(h) { 202 dups.Insert(h) 203 } 204 seen.Insert(h) 205 } 206 } 207 208 return dups 209 } 210 211 // PacksForBlobs returns the set of packs in which the blobs are contained. 212 func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { 213 packs = restic.NewIDSet() 214 215 for id, p := range idx.Packs { 216 for _, entry := range p.Entries { 217 if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) { 218 packs.Insert(id) 219 } 220 } 221 } 222 223 return packs 224 } 225 226 // Location describes the location of a blob in a pack. 227 type Location struct { 228 PackID restic.ID 229 restic.Blob 230 } 231 232 // ErrBlobNotFound is return by FindBlob when the blob could not be found in 233 // the index. 234 var ErrBlobNotFound = errors.New("blob not found in index") 235 236 // FindBlob returns a list of packs and positions the blob can be found in. 237 func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) { 238 for id, p := range idx.Packs { 239 for _, entry := range p.Entries { 240 if entry.ID.Equal(h.ID) && entry.Type == h.Type { 241 result = append(result, Location{ 242 PackID: id, 243 Blob: entry, 244 }) 245 } 246 } 247 } 248 249 if len(result) == 0 { 250 return nil, ErrBlobNotFound 251 } 252 253 return result, nil 254 } 255 256 // Save writes the complete index to the repo. 257 func (idx *Index) Save(ctx context.Context, repo restic.Repository, supersedes restic.IDs) (restic.ID, error) { 258 packs := make(map[restic.ID][]restic.Blob, len(idx.Packs)) 259 for id, p := range idx.Packs { 260 packs[id] = p.Entries 261 } 262 263 return Save(ctx, repo, packs, supersedes) 264 } 265 266 // Save writes a new index containing the given packs. 267 func Save(ctx context.Context, repo restic.Repository, packs map[restic.ID][]restic.Blob, supersedes restic.IDs) (restic.ID, error) { 268 idx := &indexJSON{ 269 Supersedes: supersedes, 270 Packs: make([]*packJSON, 0, len(packs)), 271 } 272 273 for packID, blobs := range packs { 274 b := make([]blobJSON, 0, len(blobs)) 275 for _, blob := range blobs { 276 b = append(b, blobJSON{ 277 ID: blob.ID, 278 Type: blob.Type, 279 Offset: blob.Offset, 280 Length: blob.Length, 281 }) 282 } 283 284 p := &packJSON{ 285 ID: packID, 286 Blobs: b, 287 } 288 289 idx.Packs = append(idx.Packs, p) 290 } 291 292 return repo.SaveJSONUnpacked(ctx, restic.IndexFile, idx) 293 }