github.com/advanderveer/restic@v0.8.1-0.20171209104529-42a8c19aaea6/internal/index/index.go (about)

     1  // Package index contains various data structures for indexing content in a repository or backend.
     2  package index
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  
     9  	"github.com/restic/restic/internal/debug"
    10  	"github.com/restic/restic/internal/list"
    11  	"github.com/restic/restic/internal/pack"
    12  	"github.com/restic/restic/internal/restic"
    13  	"github.com/restic/restic/internal/worker"
    14  
    15  	"github.com/restic/restic/internal/errors"
    16  )
    17  
    18  // Pack contains information about the contents of a pack.
    19  type Pack struct {
    20  	ID      restic.ID
    21  	Size    int64
    22  	Entries []restic.Blob
    23  }
    24  
    25  // Index contains information about blobs and packs stored in a repo.
    26  type Index struct {
    27  	Packs    map[restic.ID]Pack
    28  	IndexIDs restic.IDSet
    29  }
    30  
    31  func newIndex() *Index {
    32  	return &Index{
    33  		Packs:    make(map[restic.ID]Pack),
    34  		IndexIDs: restic.NewIDSet(),
    35  	}
    36  }
    37  
    38  // New creates a new index for repo from scratch. InvalidFiles contains all IDs
    39  // of files  that cannot be listed successfully.
    40  func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, p *restic.Progress) (idx *Index, invalidFiles restic.IDs, err error) {
    41  	p.Start()
    42  	defer p.Done()
    43  
    44  	ch := make(chan worker.Job)
    45  	go list.AllPacks(ctx, repo, ignorePacks, ch)
    46  
    47  	idx = newIndex()
    48  
    49  	for job := range ch {
    50  		p.Report(restic.Stat{Blobs: 1})
    51  
    52  		packID := job.Data.(restic.ID)
    53  		if job.Error != nil {
    54  			cause := errors.Cause(job.Error)
    55  			if _, ok := cause.(pack.InvalidFileError); ok {
    56  				invalidFiles = append(invalidFiles, packID)
    57  				continue
    58  			}
    59  
    60  			fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error)
    61  			continue
    62  		}
    63  
    64  		j := job.Result.(list.Result)
    65  
    66  		debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries()))
    67  
    68  		err := idx.AddPack(packID, j.Size(), j.Entries())
    69  		if err != nil {
    70  			return nil, nil, err
    71  		}
    72  	}
    73  
    74  	return idx, invalidFiles, nil
    75  }
    76  
    77  type packJSON struct {
    78  	ID    restic.ID  `json:"id"`
    79  	Blobs []blobJSON `json:"blobs"`
    80  }
    81  
    82  type blobJSON struct {
    83  	ID     restic.ID       `json:"id"`
    84  	Type   restic.BlobType `json:"type"`
    85  	Offset uint            `json:"offset"`
    86  	Length uint            `json:"length"`
    87  }
    88  
    89  type indexJSON struct {
    90  	Supersedes restic.IDs  `json:"supersedes,omitempty"`
    91  	Packs      []*packJSON `json:"packs"`
    92  }
    93  
    94  func loadIndexJSON(ctx context.Context, repo restic.Repository, id restic.ID) (*indexJSON, error) {
    95  	debug.Log("process index %v\n", id.Str())
    96  
    97  	var idx indexJSON
    98  	err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	return &idx, nil
   104  }
   105  
   106  // Load creates an index by loading all index files from the repo.
   107  func Load(ctx context.Context, repo restic.Repository, p *restic.Progress) (*Index, error) {
   108  	debug.Log("loading indexes")
   109  
   110  	p.Start()
   111  	defer p.Done()
   112  
   113  	supersedes := make(map[restic.ID]restic.IDSet)
   114  	results := make(map[restic.ID]map[restic.ID]Pack)
   115  
   116  	index := newIndex()
   117  
   118  	for id := range repo.List(ctx, restic.IndexFile) {
   119  		p.Report(restic.Stat{Blobs: 1})
   120  
   121  		debug.Log("Load index %v", id.Str())
   122  		idx, err := loadIndexJSON(ctx, repo, id)
   123  		if err != nil {
   124  			return nil, err
   125  		}
   126  
   127  		res := make(map[restic.ID]Pack)
   128  		supersedes[id] = restic.NewIDSet()
   129  		for _, sid := range idx.Supersedes {
   130  			debug.Log("  index %v supersedes %v", id.Str(), sid)
   131  			supersedes[id].Insert(sid)
   132  		}
   133  
   134  		for _, jpack := range idx.Packs {
   135  			entries := make([]restic.Blob, 0, len(jpack.Blobs))
   136  			for _, blob := range jpack.Blobs {
   137  				entry := restic.Blob{
   138  					ID:     blob.ID,
   139  					Type:   blob.Type,
   140  					Offset: blob.Offset,
   141  					Length: blob.Length,
   142  				}
   143  				entries = append(entries, entry)
   144  			}
   145  
   146  			if err = index.AddPack(jpack.ID, 0, entries); err != nil {
   147  				return nil, err
   148  			}
   149  		}
   150  
   151  		results[id] = res
   152  		index.IndexIDs.Insert(id)
   153  	}
   154  
   155  	for superID, list := range supersedes {
   156  		for indexID := range list {
   157  			if _, ok := results[indexID]; !ok {
   158  				continue
   159  			}
   160  			debug.Log("  removing index %v, superseded by %v", indexID.Str(), superID.Str())
   161  			fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
   162  			delete(results, indexID)
   163  		}
   164  	}
   165  
   166  	return index, nil
   167  }
   168  
   169  // AddPack adds a pack to the index. If this pack is already in the index, an
   170  // error is returned.
   171  func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error {
   172  	if _, ok := idx.Packs[id]; ok {
   173  		return errors.Errorf("pack %v already present in the index", id.Str())
   174  	}
   175  
   176  	idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries}
   177  
   178  	return nil
   179  }
   180  
   181  // RemovePack deletes a pack from the index.
   182  func (idx *Index) RemovePack(id restic.ID) error {
   183  	if _, ok := idx.Packs[id]; !ok {
   184  		return errors.Errorf("pack %v not found in the index", id.Str())
   185  	}
   186  
   187  	delete(idx.Packs, id)
   188  
   189  	return nil
   190  }
   191  
   192  // DuplicateBlobs returns a list of blobs that are stored more than once in the
   193  // repo.
   194  func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) {
   195  	dups = restic.NewBlobSet()
   196  	seen := restic.NewBlobSet()
   197  
   198  	for _, p := range idx.Packs {
   199  		for _, entry := range p.Entries {
   200  			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
   201  			if seen.Has(h) {
   202  				dups.Insert(h)
   203  			}
   204  			seen.Insert(h)
   205  		}
   206  	}
   207  
   208  	return dups
   209  }
   210  
   211  // PacksForBlobs returns the set of packs in which the blobs are contained.
   212  func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
   213  	packs = restic.NewIDSet()
   214  
   215  	for id, p := range idx.Packs {
   216  		for _, entry := range p.Entries {
   217  			if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) {
   218  				packs.Insert(id)
   219  			}
   220  		}
   221  	}
   222  
   223  	return packs
   224  }
   225  
   226  // Location describes the location of a blob in a pack.
   227  type Location struct {
   228  	PackID restic.ID
   229  	restic.Blob
   230  }
   231  
   232  // ErrBlobNotFound is return by FindBlob when the blob could not be found in
   233  // the index.
   234  var ErrBlobNotFound = errors.New("blob not found in index")
   235  
   236  // FindBlob returns a list of packs and positions the blob can be found in.
   237  func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) {
   238  	for id, p := range idx.Packs {
   239  		for _, entry := range p.Entries {
   240  			if entry.ID.Equal(h.ID) && entry.Type == h.Type {
   241  				result = append(result, Location{
   242  					PackID: id,
   243  					Blob:   entry,
   244  				})
   245  			}
   246  		}
   247  	}
   248  
   249  	if len(result) == 0 {
   250  		return nil, ErrBlobNotFound
   251  	}
   252  
   253  	return result, nil
   254  }
   255  
   256  // Save writes the complete index to the repo.
   257  func (idx *Index) Save(ctx context.Context, repo restic.Repository, supersedes restic.IDs) (restic.ID, error) {
   258  	packs := make(map[restic.ID][]restic.Blob, len(idx.Packs))
   259  	for id, p := range idx.Packs {
   260  		packs[id] = p.Entries
   261  	}
   262  
   263  	return Save(ctx, repo, packs, supersedes)
   264  }
   265  
   266  // Save writes a new index containing the given packs.
   267  func Save(ctx context.Context, repo restic.Repository, packs map[restic.ID][]restic.Blob, supersedes restic.IDs) (restic.ID, error) {
   268  	idx := &indexJSON{
   269  		Supersedes: supersedes,
   270  		Packs:      make([]*packJSON, 0, len(packs)),
   271  	}
   272  
   273  	for packID, blobs := range packs {
   274  		b := make([]blobJSON, 0, len(blobs))
   275  		for _, blob := range blobs {
   276  			b = append(b, blobJSON{
   277  				ID:     blob.ID,
   278  				Type:   blob.Type,
   279  				Offset: blob.Offset,
   280  				Length: blob.Length,
   281  			})
   282  		}
   283  
   284  		p := &packJSON{
   285  			ID:    packID,
   286  			Blobs: b,
   287  		}
   288  
   289  		idx.Packs = append(idx.Packs, p)
   290  	}
   291  
   292  	return repo.SaveJSONUnpacked(ctx, restic.IndexFile, idx)
   293  }