github.com/fawick/restic@v0.1.1-0.20171126184616-c02923fbfc79/internal/repository/index.go (about)

     1  package repository
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"io"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/restic/restic/internal/errors"
    11  	"github.com/restic/restic/internal/restic"
    12  
    13  	"github.com/restic/restic/internal/debug"
    14  )
    15  
    16  // Index holds a lookup table for id -> pack.
    17  type Index struct {
    18  	m         sync.Mutex
    19  	pack      map[restic.BlobHandle][]indexEntry
    20  	treePacks restic.IDs
    21  
    22  	final      bool      // set to true for all indexes read from the backend ("finalized")
    23  	id         restic.ID // set to the ID of the index when it's finalized
    24  	supersedes restic.IDs
    25  	created    time.Time
    26  }
    27  
    28  type indexEntry struct {
    29  	packID restic.ID
    30  	offset uint
    31  	length uint
    32  }
    33  
    34  // NewIndex returns a new index.
    35  func NewIndex() *Index {
    36  	return &Index{
    37  		pack:    make(map[restic.BlobHandle][]indexEntry),
    38  		created: time.Now(),
    39  	}
    40  }
    41  
    42  func (idx *Index) store(blob restic.PackedBlob) {
    43  	newEntry := indexEntry{
    44  		packID: blob.PackID,
    45  		offset: blob.Offset,
    46  		length: blob.Length,
    47  	}
    48  	h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
    49  	idx.pack[h] = append(idx.pack[h], newEntry)
    50  }
    51  
    52  // Final returns true iff the index is already written to the repository, it is
    53  // finalized.
    54  func (idx *Index) Final() bool {
    55  	idx.m.Lock()
    56  	defer idx.m.Unlock()
    57  
    58  	return idx.final
    59  }
    60  
    61  const (
    62  	indexMinBlobs = 20
    63  	indexMaxBlobs = 2000
    64  	indexMinAge   = 2 * time.Minute
    65  	indexMaxAge   = 15 * time.Minute
    66  )
    67  
    68  // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
    69  var IndexFull = func(idx *Index) bool {
    70  	idx.m.Lock()
    71  	defer idx.m.Unlock()
    72  
    73  	debug.Log("checking whether index %p is full", idx)
    74  
    75  	packs := len(idx.pack)
    76  	age := time.Now().Sub(idx.created)
    77  
    78  	if age > indexMaxAge {
    79  		debug.Log("index %p is old enough", idx, age)
    80  		return true
    81  	}
    82  
    83  	if packs < indexMinBlobs || age < indexMinAge {
    84  		debug.Log("index %p only has %d packs or is too young (%v)", idx, packs, age)
    85  		return false
    86  	}
    87  
    88  	if packs > indexMaxBlobs {
    89  		debug.Log("index %p has %d packs", idx, packs)
    90  		return true
    91  	}
    92  
    93  	debug.Log("index %p is not full", idx)
    94  	return false
    95  }
    96  
    97  // Store remembers the id and pack in the index. An existing entry will be
    98  // silently overwritten.
    99  func (idx *Index) Store(blob restic.PackedBlob) {
   100  	idx.m.Lock()
   101  	defer idx.m.Unlock()
   102  
   103  	if idx.final {
   104  		panic("store new item in finalized index")
   105  	}
   106  
   107  	debug.Log("%v", blob)
   108  
   109  	idx.store(blob)
   110  }
   111  
   112  // Lookup queries the index for the blob ID and returns a restic.PackedBlob.
   113  func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) {
   114  	idx.m.Lock()
   115  	defer idx.m.Unlock()
   116  
   117  	h := restic.BlobHandle{ID: id, Type: tpe}
   118  
   119  	if packs, ok := idx.pack[h]; ok {
   120  		blobs = make([]restic.PackedBlob, 0, len(packs))
   121  
   122  		for _, p := range packs {
   123  			debug.Log("id %v found in pack %v at %d, length %d",
   124  				id.Str(), p.packID.Str(), p.offset, p.length)
   125  
   126  			blob := restic.PackedBlob{
   127  				Blob: restic.Blob{
   128  					Type:   tpe,
   129  					Length: p.length,
   130  					ID:     id,
   131  					Offset: p.offset,
   132  				},
   133  				PackID: p.packID,
   134  			}
   135  
   136  			blobs = append(blobs, blob)
   137  		}
   138  
   139  		return blobs, nil
   140  	}
   141  
   142  	debug.Log("id %v not found", id.Str())
   143  	return nil, errors.Errorf("id %v not found in index", id)
   144  }
   145  
   146  // ListPack returns a list of blobs contained in a pack.
   147  func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) {
   148  	idx.m.Lock()
   149  	defer idx.m.Unlock()
   150  
   151  	for h, packList := range idx.pack {
   152  		for _, entry := range packList {
   153  			if entry.packID == id {
   154  				list = append(list, restic.PackedBlob{
   155  					Blob: restic.Blob{
   156  						ID:     h.ID,
   157  						Type:   h.Type,
   158  						Length: entry.length,
   159  						Offset: entry.offset,
   160  					},
   161  					PackID: entry.packID,
   162  				})
   163  			}
   164  		}
   165  	}
   166  
   167  	return list
   168  }
   169  
   170  // Has returns true iff the id is listed in the index.
   171  func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
   172  	_, err := idx.Lookup(id, tpe)
   173  	if err == nil {
   174  		return true
   175  	}
   176  
   177  	return false
   178  }
   179  
   180  // LookupSize returns the length of the plaintext content of the blob with the
   181  // given id.
   182  func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) {
   183  	blobs, err := idx.Lookup(id, tpe)
   184  	if err != nil {
   185  		return 0, err
   186  	}
   187  
   188  	return uint(restic.PlaintextLength(int(blobs[0].Length))), nil
   189  }
   190  
   191  // Supersedes returns the list of indexes this index supersedes, if any.
   192  func (idx *Index) Supersedes() restic.IDs {
   193  	return idx.supersedes
   194  }
   195  
   196  // AddToSupersedes adds the ids to the list of indexes superseded by this
   197  // index. If the index has already been finalized, an error is returned.
   198  func (idx *Index) AddToSupersedes(ids ...restic.ID) error {
   199  	idx.m.Lock()
   200  	defer idx.m.Unlock()
   201  
   202  	if idx.final {
   203  		return errors.New("index already finalized")
   204  	}
   205  
   206  	idx.supersedes = append(idx.supersedes, ids...)
   207  	return nil
   208  }
   209  
   210  // Each returns a channel that yields all blobs known to the index. When the
   211  // context is cancelled, the background goroutine terminates. This blocks any
   212  // modification of the index.
   213  func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob {
   214  	idx.m.Lock()
   215  
   216  	ch := make(chan restic.PackedBlob)
   217  
   218  	go func() {
   219  		defer idx.m.Unlock()
   220  		defer func() {
   221  			close(ch)
   222  		}()
   223  
   224  		for h, packs := range idx.pack {
   225  			for _, blob := range packs {
   226  				select {
   227  				case <-ctx.Done():
   228  					return
   229  				case ch <- restic.PackedBlob{
   230  					Blob: restic.Blob{
   231  						ID:     h.ID,
   232  						Type:   h.Type,
   233  						Offset: blob.offset,
   234  						Length: blob.length,
   235  					},
   236  					PackID: blob.packID,
   237  				}:
   238  				}
   239  			}
   240  		}
   241  	}()
   242  
   243  	return ch
   244  }
   245  
   246  // Packs returns all packs in this index
   247  func (idx *Index) Packs() restic.IDSet {
   248  	idx.m.Lock()
   249  	defer idx.m.Unlock()
   250  
   251  	packs := restic.NewIDSet()
   252  	for _, list := range idx.pack {
   253  		for _, entry := range list {
   254  			packs.Insert(entry.packID)
   255  		}
   256  	}
   257  
   258  	return packs
   259  }
   260  
   261  // Count returns the number of blobs of type t in the index.
   262  func (idx *Index) Count(t restic.BlobType) (n uint) {
   263  	debug.Log("counting blobs of type %v", t)
   264  	idx.m.Lock()
   265  	defer idx.m.Unlock()
   266  
   267  	for h, list := range idx.pack {
   268  		if h.Type != t {
   269  			continue
   270  		}
   271  
   272  		n += uint(len(list))
   273  	}
   274  
   275  	return
   276  }
   277  
   278  type packJSON struct {
   279  	ID    restic.ID  `json:"id"`
   280  	Blobs []blobJSON `json:"blobs"`
   281  }
   282  
   283  type blobJSON struct {
   284  	ID     restic.ID       `json:"id"`
   285  	Type   restic.BlobType `json:"type"`
   286  	Offset uint            `json:"offset"`
   287  	Length uint            `json:"length"`
   288  }
   289  
   290  // generatePackList returns a list of packs.
   291  func (idx *Index) generatePackList() ([]*packJSON, error) {
   292  	list := []*packJSON{}
   293  	packs := make(map[restic.ID]*packJSON)
   294  
   295  	for h, packedBlobs := range idx.pack {
   296  		for _, blob := range packedBlobs {
   297  			if blob.packID.IsNull() {
   298  				panic("null pack id")
   299  			}
   300  
   301  			debug.Log("handle blob %v", h)
   302  
   303  			if blob.packID.IsNull() {
   304  				debug.Log("blob %v has no packID! (offset %v, length %v)",
   305  					h, blob.offset, blob.length)
   306  				return nil, errors.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", h)
   307  			}
   308  
   309  			// see if pack is already in map
   310  			p, ok := packs[blob.packID]
   311  			if !ok {
   312  				// else create new pack
   313  				p = &packJSON{ID: blob.packID}
   314  
   315  				// and append it to the list and map
   316  				list = append(list, p)
   317  				packs[p.ID] = p
   318  			}
   319  
   320  			// add blob
   321  			p.Blobs = append(p.Blobs, blobJSON{
   322  				ID:     h.ID,
   323  				Type:   h.Type,
   324  				Offset: blob.offset,
   325  				Length: blob.length,
   326  			})
   327  		}
   328  	}
   329  
   330  	debug.Log("done")
   331  
   332  	return list, nil
   333  }
   334  
   335  type jsonIndex struct {
   336  	Supersedes restic.IDs  `json:"supersedes,omitempty"`
   337  	Packs      []*packJSON `json:"packs"`
   338  }
   339  
   340  // Encode writes the JSON serialization of the index to the writer w.
   341  func (idx *Index) Encode(w io.Writer) error {
   342  	debug.Log("encoding index")
   343  	idx.m.Lock()
   344  	defer idx.m.Unlock()
   345  
   346  	return idx.encode(w)
   347  }
   348  
   349  // encode writes the JSON serialization of the index to the writer w.
   350  func (idx *Index) encode(w io.Writer) error {
   351  	debug.Log("encoding index")
   352  
   353  	list, err := idx.generatePackList()
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	enc := json.NewEncoder(w)
   359  	idxJSON := jsonIndex{
   360  		Supersedes: idx.supersedes,
   361  		Packs:      list,
   362  	}
   363  	return enc.Encode(idxJSON)
   364  }
   365  
   366  // Finalize sets the index to final and writes the JSON serialization to w.
   367  func (idx *Index) Finalize(w io.Writer) error {
   368  	debug.Log("encoding index")
   369  	idx.m.Lock()
   370  	defer idx.m.Unlock()
   371  
   372  	idx.final = true
   373  
   374  	return idx.encode(w)
   375  }
   376  
   377  // ID returns the ID of the index, if available. If the index is not yet
   378  // finalized, an error is returned.
   379  func (idx *Index) ID() (restic.ID, error) {
   380  	idx.m.Lock()
   381  	defer idx.m.Unlock()
   382  
   383  	if !idx.final {
   384  		return restic.ID{}, errors.New("index not finalized")
   385  	}
   386  
   387  	return idx.id, nil
   388  }
   389  
   390  // SetID sets the ID the index has been written to. This requires that
   391  // Finalize() has been called before, otherwise an error is returned.
   392  func (idx *Index) SetID(id restic.ID) error {
   393  	idx.m.Lock()
   394  	defer idx.m.Unlock()
   395  
   396  	if !idx.final {
   397  		return errors.New("index is not final")
   398  	}
   399  
   400  	if !idx.id.IsNull() {
   401  		return errors.New("ID already set")
   402  	}
   403  
   404  	debug.Log("ID set to %v", id.Str())
   405  	idx.id = id
   406  
   407  	return nil
   408  }
   409  
   410  // Dump writes the pretty-printed JSON representation of the index to w.
   411  func (idx *Index) Dump(w io.Writer) error {
   412  	debug.Log("dumping index")
   413  	idx.m.Lock()
   414  	defer idx.m.Unlock()
   415  
   416  	list, err := idx.generatePackList()
   417  	if err != nil {
   418  		return err
   419  	}
   420  
   421  	outer := jsonIndex{
   422  		Supersedes: idx.Supersedes(),
   423  		Packs:      list,
   424  	}
   425  
   426  	buf, err := json.MarshalIndent(outer, "", "  ")
   427  	if err != nil {
   428  		return err
   429  	}
   430  
   431  	_, err = w.Write(append(buf, '\n'))
   432  	if err != nil {
   433  		return errors.Wrap(err, "Write")
   434  	}
   435  
   436  	debug.Log("done")
   437  
   438  	return nil
   439  }
   440  
   441  // TreePacks returns a list of packs that contain only tree blobs.
   442  func (idx *Index) TreePacks() restic.IDs {
   443  	return idx.treePacks
   444  }
   445  
   446  // isErrOldIndex returns true if the error may be caused by an old index
   447  // format.
   448  func isErrOldIndex(err error) bool {
   449  	if e, ok := err.(*json.UnmarshalTypeError); ok && e.Value == "array" {
   450  		return true
   451  	}
   452  
   453  	return false
   454  }
   455  
   456  // ErrOldIndexFormat means an index with the old format was detected.
   457  var ErrOldIndexFormat = errors.New("index has old format")
   458  
   459  // DecodeIndex loads and unserializes an index from rd.
   460  func DecodeIndex(buf []byte) (idx *Index, err error) {
   461  	debug.Log("Start decoding index")
   462  	idxJSON := &jsonIndex{}
   463  
   464  	err = json.Unmarshal(buf, idxJSON)
   465  	if err != nil {
   466  		debug.Log("Error %v", err)
   467  
   468  		if isErrOldIndex(err) {
   469  			debug.Log("index is probably old format, trying that")
   470  			err = ErrOldIndexFormat
   471  		}
   472  
   473  		return nil, errors.Wrap(err, "Decode")
   474  	}
   475  
   476  	idx = NewIndex()
   477  	for _, pack := range idxJSON.Packs {
   478  		var data, tree bool
   479  
   480  		for _, blob := range pack.Blobs {
   481  			idx.store(restic.PackedBlob{
   482  				Blob: restic.Blob{
   483  					Type:   blob.Type,
   484  					ID:     blob.ID,
   485  					Offset: blob.Offset,
   486  					Length: blob.Length,
   487  				},
   488  				PackID: pack.ID,
   489  			})
   490  
   491  			switch blob.Type {
   492  			case restic.DataBlob:
   493  				data = true
   494  			case restic.TreeBlob:
   495  				tree = true
   496  			}
   497  		}
   498  
   499  		if !data && tree {
   500  			idx.treePacks = append(idx.treePacks, pack.ID)
   501  		}
   502  	}
   503  	idx.supersedes = idxJSON.Supersedes
   504  	idx.final = true
   505  
   506  	debug.Log("done")
   507  	return idx, nil
   508  }
   509  
   510  // DecodeOldIndex loads and unserializes an index in the old format from rd.
   511  func DecodeOldIndex(buf []byte) (idx *Index, err error) {
   512  	debug.Log("Start decoding old index")
   513  	list := []*packJSON{}
   514  
   515  	err = json.Unmarshal(buf, &list)
   516  	if err != nil {
   517  		debug.Log("Error %#v", err)
   518  		return nil, errors.Wrap(err, "Decode")
   519  	}
   520  
   521  	idx = NewIndex()
   522  	for _, pack := range list {
   523  		var data, tree bool
   524  
   525  		for _, blob := range pack.Blobs {
   526  			idx.store(restic.PackedBlob{
   527  				Blob: restic.Blob{
   528  					Type:   blob.Type,
   529  					ID:     blob.ID,
   530  					Offset: blob.Offset,
   531  					Length: blob.Length,
   532  				},
   533  				PackID: pack.ID,
   534  			})
   535  
   536  			switch blob.Type {
   537  			case restic.DataBlob:
   538  				data = true
   539  			case restic.TreeBlob:
   540  				tree = true
   541  			}
   542  		}
   543  
   544  		if !data && tree {
   545  			idx.treePacks = append(idx.treePacks, pack.ID)
   546  		}
   547  	}
   548  	idx.final = true
   549  
   550  	debug.Log("done")
   551  	return idx, nil
   552  }
   553  
   554  // LoadIndexWithDecoder loads the index and decodes it with fn.
   555  func LoadIndexWithDecoder(ctx context.Context, repo restic.Repository, id restic.ID, fn func([]byte) (*Index, error)) (idx *Index, err error) {
   556  	debug.Log("Loading index %v", id.Str())
   557  
   558  	buf, err := repo.LoadAndDecrypt(ctx, restic.IndexFile, id)
   559  	if err != nil {
   560  		return nil, err
   561  	}
   562  
   563  	idx, err = fn(buf)
   564  	if err != nil {
   565  		debug.Log("error while decoding index %v: %v", id, err)
   566  		return nil, err
   567  	}
   568  
   569  	idx.id = id
   570  
   571  	return idx, nil
   572  }