github.com/fawick/restic@v0.1.1-0.20171126184616-c02923fbfc79/internal/repository/index.go (about) 1 package repository 2 3 import ( 4 "context" 5 "encoding/json" 6 "io" 7 "sync" 8 "time" 9 10 "github.com/restic/restic/internal/errors" 11 "github.com/restic/restic/internal/restic" 12 13 "github.com/restic/restic/internal/debug" 14 ) 15 16 // Index holds a lookup table for id -> pack. 17 type Index struct { 18 m sync.Mutex 19 pack map[restic.BlobHandle][]indexEntry 20 treePacks restic.IDs 21 22 final bool // set to true for all indexes read from the backend ("finalized") 23 id restic.ID // set to the ID of the index when it's finalized 24 supersedes restic.IDs 25 created time.Time 26 } 27 28 type indexEntry struct { 29 packID restic.ID 30 offset uint 31 length uint 32 } 33 34 // NewIndex returns a new index. 35 func NewIndex() *Index { 36 return &Index{ 37 pack: make(map[restic.BlobHandle][]indexEntry), 38 created: time.Now(), 39 } 40 } 41 42 func (idx *Index) store(blob restic.PackedBlob) { 43 newEntry := indexEntry{ 44 packID: blob.PackID, 45 offset: blob.Offset, 46 length: blob.Length, 47 } 48 h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} 49 idx.pack[h] = append(idx.pack[h], newEntry) 50 } 51 52 // Final returns true iff the index is already written to the repository, it is 53 // finalized. 54 func (idx *Index) Final() bool { 55 idx.m.Lock() 56 defer idx.m.Unlock() 57 58 return idx.final 59 } 60 61 const ( 62 indexMinBlobs = 20 63 indexMaxBlobs = 2000 64 indexMinAge = 2 * time.Minute 65 indexMaxAge = 15 * time.Minute 66 ) 67 68 // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index. 69 var IndexFull = func(idx *Index) bool { 70 idx.m.Lock() 71 defer idx.m.Unlock() 72 73 debug.Log("checking whether index %p is full", idx) 74 75 packs := len(idx.pack) 76 age := time.Now().Sub(idx.created) 77 78 if age > indexMaxAge { 79 debug.Log("index %p is old enough", idx, age) 80 return true 81 } 82 83 if packs < indexMinBlobs || age < indexMinAge { 84 debug.Log("index %p only has %d packs or is too young (%v)", idx, packs, age) 85 return false 86 } 87 88 if packs > indexMaxBlobs { 89 debug.Log("index %p has %d packs", idx, packs) 90 return true 91 } 92 93 debug.Log("index %p is not full", idx) 94 return false 95 } 96 97 // Store remembers the id and pack in the index. An existing entry will be 98 // silently overwritten. 99 func (idx *Index) Store(blob restic.PackedBlob) { 100 idx.m.Lock() 101 defer idx.m.Unlock() 102 103 if idx.final { 104 panic("store new item in finalized index") 105 } 106 107 debug.Log("%v", blob) 108 109 idx.store(blob) 110 } 111 112 // Lookup queries the index for the blob ID and returns a restic.PackedBlob. 113 func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) { 114 idx.m.Lock() 115 defer idx.m.Unlock() 116 117 h := restic.BlobHandle{ID: id, Type: tpe} 118 119 if packs, ok := idx.pack[h]; ok { 120 blobs = make([]restic.PackedBlob, 0, len(packs)) 121 122 for _, p := range packs { 123 debug.Log("id %v found in pack %v at %d, length %d", 124 id.Str(), p.packID.Str(), p.offset, p.length) 125 126 blob := restic.PackedBlob{ 127 Blob: restic.Blob{ 128 Type: tpe, 129 Length: p.length, 130 ID: id, 131 Offset: p.offset, 132 }, 133 PackID: p.packID, 134 } 135 136 blobs = append(blobs, blob) 137 } 138 139 return blobs, nil 140 } 141 142 debug.Log("id %v not found", id.Str()) 143 return nil, errors.Errorf("id %v not found in index", id) 144 } 145 146 // ListPack returns a list of blobs contained in a pack. 147 func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) { 148 idx.m.Lock() 149 defer idx.m.Unlock() 150 151 for h, packList := range idx.pack { 152 for _, entry := range packList { 153 if entry.packID == id { 154 list = append(list, restic.PackedBlob{ 155 Blob: restic.Blob{ 156 ID: h.ID, 157 Type: h.Type, 158 Length: entry.length, 159 Offset: entry.offset, 160 }, 161 PackID: entry.packID, 162 }) 163 } 164 } 165 } 166 167 return list 168 } 169 170 // Has returns true iff the id is listed in the index. 171 func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool { 172 _, err := idx.Lookup(id, tpe) 173 if err == nil { 174 return true 175 } 176 177 return false 178 } 179 180 // LookupSize returns the length of the plaintext content of the blob with the 181 // given id. 182 func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) { 183 blobs, err := idx.Lookup(id, tpe) 184 if err != nil { 185 return 0, err 186 } 187 188 return uint(restic.PlaintextLength(int(blobs[0].Length))), nil 189 } 190 191 // Supersedes returns the list of indexes this index supersedes, if any. 192 func (idx *Index) Supersedes() restic.IDs { 193 return idx.supersedes 194 } 195 196 // AddToSupersedes adds the ids to the list of indexes superseded by this 197 // index. If the index has already been finalized, an error is returned. 198 func (idx *Index) AddToSupersedes(ids ...restic.ID) error { 199 idx.m.Lock() 200 defer idx.m.Unlock() 201 202 if idx.final { 203 return errors.New("index already finalized") 204 } 205 206 idx.supersedes = append(idx.supersedes, ids...) 207 return nil 208 } 209 210 // Each returns a channel that yields all blobs known to the index. When the 211 // context is cancelled, the background goroutine terminates. This blocks any 212 // modification of the index. 213 func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob { 214 idx.m.Lock() 215 216 ch := make(chan restic.PackedBlob) 217 218 go func() { 219 defer idx.m.Unlock() 220 defer func() { 221 close(ch) 222 }() 223 224 for h, packs := range idx.pack { 225 for _, blob := range packs { 226 select { 227 case <-ctx.Done(): 228 return 229 case ch <- restic.PackedBlob{ 230 Blob: restic.Blob{ 231 ID: h.ID, 232 Type: h.Type, 233 Offset: blob.offset, 234 Length: blob.length, 235 }, 236 PackID: blob.packID, 237 }: 238 } 239 } 240 } 241 }() 242 243 return ch 244 } 245 246 // Packs returns all packs in this index 247 func (idx *Index) Packs() restic.IDSet { 248 idx.m.Lock() 249 defer idx.m.Unlock() 250 251 packs := restic.NewIDSet() 252 for _, list := range idx.pack { 253 for _, entry := range list { 254 packs.Insert(entry.packID) 255 } 256 } 257 258 return packs 259 } 260 261 // Count returns the number of blobs of type t in the index. 262 func (idx *Index) Count(t restic.BlobType) (n uint) { 263 debug.Log("counting blobs of type %v", t) 264 idx.m.Lock() 265 defer idx.m.Unlock() 266 267 for h, list := range idx.pack { 268 if h.Type != t { 269 continue 270 } 271 272 n += uint(len(list)) 273 } 274 275 return 276 } 277 278 type packJSON struct { 279 ID restic.ID `json:"id"` 280 Blobs []blobJSON `json:"blobs"` 281 } 282 283 type blobJSON struct { 284 ID restic.ID `json:"id"` 285 Type restic.BlobType `json:"type"` 286 Offset uint `json:"offset"` 287 Length uint `json:"length"` 288 } 289 290 // generatePackList returns a list of packs. 291 func (idx *Index) generatePackList() ([]*packJSON, error) { 292 list := []*packJSON{} 293 packs := make(map[restic.ID]*packJSON) 294 295 for h, packedBlobs := range idx.pack { 296 for _, blob := range packedBlobs { 297 if blob.packID.IsNull() { 298 panic("null pack id") 299 } 300 301 debug.Log("handle blob %v", h) 302 303 if blob.packID.IsNull() { 304 debug.Log("blob %v has no packID! (offset %v, length %v)", 305 h, blob.offset, blob.length) 306 return nil, errors.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", h) 307 } 308 309 // see if pack is already in map 310 p, ok := packs[blob.packID] 311 if !ok { 312 // else create new pack 313 p = &packJSON{ID: blob.packID} 314 315 // and append it to the list and map 316 list = append(list, p) 317 packs[p.ID] = p 318 } 319 320 // add blob 321 p.Blobs = append(p.Blobs, blobJSON{ 322 ID: h.ID, 323 Type: h.Type, 324 Offset: blob.offset, 325 Length: blob.length, 326 }) 327 } 328 } 329 330 debug.Log("done") 331 332 return list, nil 333 } 334 335 type jsonIndex struct { 336 Supersedes restic.IDs `json:"supersedes,omitempty"` 337 Packs []*packJSON `json:"packs"` 338 } 339 340 // Encode writes the JSON serialization of the index to the writer w. 341 func (idx *Index) Encode(w io.Writer) error { 342 debug.Log("encoding index") 343 idx.m.Lock() 344 defer idx.m.Unlock() 345 346 return idx.encode(w) 347 } 348 349 // encode writes the JSON serialization of the index to the writer w. 350 func (idx *Index) encode(w io.Writer) error { 351 debug.Log("encoding index") 352 353 list, err := idx.generatePackList() 354 if err != nil { 355 return err 356 } 357 358 enc := json.NewEncoder(w) 359 idxJSON := jsonIndex{ 360 Supersedes: idx.supersedes, 361 Packs: list, 362 } 363 return enc.Encode(idxJSON) 364 } 365 366 // Finalize sets the index to final and writes the JSON serialization to w. 367 func (idx *Index) Finalize(w io.Writer) error { 368 debug.Log("encoding index") 369 idx.m.Lock() 370 defer idx.m.Unlock() 371 372 idx.final = true 373 374 return idx.encode(w) 375 } 376 377 // ID returns the ID of the index, if available. If the index is not yet 378 // finalized, an error is returned. 379 func (idx *Index) ID() (restic.ID, error) { 380 idx.m.Lock() 381 defer idx.m.Unlock() 382 383 if !idx.final { 384 return restic.ID{}, errors.New("index not finalized") 385 } 386 387 return idx.id, nil 388 } 389 390 // SetID sets the ID the index has been written to. This requires that 391 // Finalize() has been called before, otherwise an error is returned. 392 func (idx *Index) SetID(id restic.ID) error { 393 idx.m.Lock() 394 defer idx.m.Unlock() 395 396 if !idx.final { 397 return errors.New("index is not final") 398 } 399 400 if !idx.id.IsNull() { 401 return errors.New("ID already set") 402 } 403 404 debug.Log("ID set to %v", id.Str()) 405 idx.id = id 406 407 return nil 408 } 409 410 // Dump writes the pretty-printed JSON representation of the index to w. 411 func (idx *Index) Dump(w io.Writer) error { 412 debug.Log("dumping index") 413 idx.m.Lock() 414 defer idx.m.Unlock() 415 416 list, err := idx.generatePackList() 417 if err != nil { 418 return err 419 } 420 421 outer := jsonIndex{ 422 Supersedes: idx.Supersedes(), 423 Packs: list, 424 } 425 426 buf, err := json.MarshalIndent(outer, "", " ") 427 if err != nil { 428 return err 429 } 430 431 _, err = w.Write(append(buf, '\n')) 432 if err != nil { 433 return errors.Wrap(err, "Write") 434 } 435 436 debug.Log("done") 437 438 return nil 439 } 440 441 // TreePacks returns a list of packs that contain only tree blobs. 442 func (idx *Index) TreePacks() restic.IDs { 443 return idx.treePacks 444 } 445 446 // isErrOldIndex returns true if the error may be caused by an old index 447 // format. 448 func isErrOldIndex(err error) bool { 449 if e, ok := err.(*json.UnmarshalTypeError); ok && e.Value == "array" { 450 return true 451 } 452 453 return false 454 } 455 456 // ErrOldIndexFormat means an index with the old format was detected. 457 var ErrOldIndexFormat = errors.New("index has old format") 458 459 // DecodeIndex loads and unserializes an index from rd. 460 func DecodeIndex(buf []byte) (idx *Index, err error) { 461 debug.Log("Start decoding index") 462 idxJSON := &jsonIndex{} 463 464 err = json.Unmarshal(buf, idxJSON) 465 if err != nil { 466 debug.Log("Error %v", err) 467 468 if isErrOldIndex(err) { 469 debug.Log("index is probably old format, trying that") 470 err = ErrOldIndexFormat 471 } 472 473 return nil, errors.Wrap(err, "Decode") 474 } 475 476 idx = NewIndex() 477 for _, pack := range idxJSON.Packs { 478 var data, tree bool 479 480 for _, blob := range pack.Blobs { 481 idx.store(restic.PackedBlob{ 482 Blob: restic.Blob{ 483 Type: blob.Type, 484 ID: blob.ID, 485 Offset: blob.Offset, 486 Length: blob.Length, 487 }, 488 PackID: pack.ID, 489 }) 490 491 switch blob.Type { 492 case restic.DataBlob: 493 data = true 494 case restic.TreeBlob: 495 tree = true 496 } 497 } 498 499 if !data && tree { 500 idx.treePacks = append(idx.treePacks, pack.ID) 501 } 502 } 503 idx.supersedes = idxJSON.Supersedes 504 idx.final = true 505 506 debug.Log("done") 507 return idx, nil 508 } 509 510 // DecodeOldIndex loads and unserializes an index in the old format from rd. 511 func DecodeOldIndex(buf []byte) (idx *Index, err error) { 512 debug.Log("Start decoding old index") 513 list := []*packJSON{} 514 515 err = json.Unmarshal(buf, &list) 516 if err != nil { 517 debug.Log("Error %#v", err) 518 return nil, errors.Wrap(err, "Decode") 519 } 520 521 idx = NewIndex() 522 for _, pack := range list { 523 var data, tree bool 524 525 for _, blob := range pack.Blobs { 526 idx.store(restic.PackedBlob{ 527 Blob: restic.Blob{ 528 Type: blob.Type, 529 ID: blob.ID, 530 Offset: blob.Offset, 531 Length: blob.Length, 532 }, 533 PackID: pack.ID, 534 }) 535 536 switch blob.Type { 537 case restic.DataBlob: 538 data = true 539 case restic.TreeBlob: 540 tree = true 541 } 542 } 543 544 if !data && tree { 545 idx.treePacks = append(idx.treePacks, pack.ID) 546 } 547 } 548 idx.final = true 549 550 debug.Log("done") 551 return idx, nil 552 } 553 554 // LoadIndexWithDecoder loads the index and decodes it with fn. 555 func LoadIndexWithDecoder(ctx context.Context, repo restic.Repository, id restic.ID, fn func([]byte) (*Index, error)) (idx *Index, err error) { 556 debug.Log("Loading index %v", id.Str()) 557 558 buf, err := repo.LoadAndDecrypt(ctx, restic.IndexFile, id) 559 if err != nil { 560 return nil, err 561 } 562 563 idx, err = fn(buf) 564 if err != nil { 565 debug.Log("error while decoding index %v: %v", id, err) 566 return nil, err 567 } 568 569 idx.id = id 570 571 return idx, nil 572 }