github.com/mckael/restic@v0.8.3/internal/index/index.go (about)

     1  // Package index contains various data structures for indexing content in a repository or backend.
     2  package index
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  
     9  	"github.com/restic/restic/internal/debug"
    10  	"github.com/restic/restic/internal/list"
    11  	"github.com/restic/restic/internal/pack"
    12  	"github.com/restic/restic/internal/restic"
    13  	"github.com/restic/restic/internal/worker"
    14  
    15  	"github.com/restic/restic/internal/errors"
    16  )
    17  
    18  // Pack contains information about the contents of a pack.
    19  type Pack struct {
    20  	ID      restic.ID
    21  	Size    int64
    22  	Entries []restic.Blob
    23  }
    24  
    25  // Index contains information about blobs and packs stored in a repo.
    26  type Index struct {
    27  	Packs    map[restic.ID]Pack
    28  	IndexIDs restic.IDSet
    29  }
    30  
    31  func newIndex() *Index {
    32  	return &Index{
    33  		Packs:    make(map[restic.ID]Pack),
    34  		IndexIDs: restic.NewIDSet(),
    35  	}
    36  }
    37  
    38  // New creates a new index for repo from scratch. InvalidFiles contains all IDs
    39  // of files  that cannot be listed successfully.
    40  func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, p *restic.Progress) (idx *Index, invalidFiles restic.IDs, err error) {
    41  	p.Start()
    42  	defer p.Done()
    43  
    44  	ch := make(chan worker.Job)
    45  	go list.AllPacks(ctx, repo, ignorePacks, ch)
    46  
    47  	idx = newIndex()
    48  
    49  	for job := range ch {
    50  		p.Report(restic.Stat{Blobs: 1})
    51  
    52  		j := job.Result.(list.Result)
    53  		if job.Error != nil {
    54  			cause := errors.Cause(job.Error)
    55  			if _, ok := cause.(pack.InvalidFileError); ok {
    56  				invalidFiles = append(invalidFiles, j.PackID())
    57  				continue
    58  			}
    59  
    60  			fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", j.PackID(), job.Error)
    61  			continue
    62  		}
    63  
    64  		debug.Log("pack %v contains %d blobs", j.PackID(), len(j.Entries()))
    65  
    66  		err := idx.AddPack(j.PackID(), j.Size(), j.Entries())
    67  		if err != nil {
    68  			return nil, nil, err
    69  		}
    70  	}
    71  
    72  	return idx, invalidFiles, nil
    73  }
    74  
    75  type packJSON struct {
    76  	ID    restic.ID  `json:"id"`
    77  	Blobs []blobJSON `json:"blobs"`
    78  }
    79  
    80  type blobJSON struct {
    81  	ID     restic.ID       `json:"id"`
    82  	Type   restic.BlobType `json:"type"`
    83  	Offset uint            `json:"offset"`
    84  	Length uint            `json:"length"`
    85  }
    86  
    87  type indexJSON struct {
    88  	Supersedes restic.IDs `json:"supersedes,omitempty"`
    89  	Packs      []packJSON `json:"packs"`
    90  }
    91  
    92  func loadIndexJSON(ctx context.Context, repo restic.Repository, id restic.ID) (*indexJSON, error) {
    93  	debug.Log("process index %v\n", id)
    94  
    95  	var idx indexJSON
    96  	err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	return &idx, nil
   102  }
   103  
   104  // Load creates an index by loading all index files from the repo.
   105  func Load(ctx context.Context, repo restic.Repository, p *restic.Progress) (*Index, error) {
   106  	debug.Log("loading indexes")
   107  
   108  	p.Start()
   109  	defer p.Done()
   110  
   111  	supersedes := make(map[restic.ID]restic.IDSet)
   112  	results := make(map[restic.ID]map[restic.ID]Pack)
   113  
   114  	index := newIndex()
   115  
   116  	err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
   117  		p.Report(restic.Stat{Blobs: 1})
   118  
   119  		debug.Log("Load index %v", id)
   120  		idx, err := loadIndexJSON(ctx, repo, id)
   121  		if err != nil {
   122  			return err
   123  		}
   124  
   125  		res := make(map[restic.ID]Pack)
   126  		supersedes[id] = restic.NewIDSet()
   127  		for _, sid := range idx.Supersedes {
   128  			debug.Log("  index %v supersedes %v", id, sid)
   129  			supersedes[id].Insert(sid)
   130  		}
   131  
   132  		for _, jpack := range idx.Packs {
   133  			entries := make([]restic.Blob, 0, len(jpack.Blobs))
   134  			for _, blob := range jpack.Blobs {
   135  				entry := restic.Blob{
   136  					ID:     blob.ID,
   137  					Type:   blob.Type,
   138  					Offset: blob.Offset,
   139  					Length: blob.Length,
   140  				}
   141  				entries = append(entries, entry)
   142  			}
   143  
   144  			if err = index.AddPack(jpack.ID, 0, entries); err != nil {
   145  				return err
   146  			}
   147  		}
   148  
   149  		results[id] = res
   150  		index.IndexIDs.Insert(id)
   151  
   152  		return nil
   153  	})
   154  
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	for superID, list := range supersedes {
   160  		for indexID := range list {
   161  			if _, ok := results[indexID]; !ok {
   162  				continue
   163  			}
   164  			debug.Log("  removing index %v, superseded by %v", indexID, superID)
   165  			fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
   166  			delete(results, indexID)
   167  		}
   168  	}
   169  
   170  	return index, nil
   171  }
   172  
   173  // AddPack adds a pack to the index. If this pack is already in the index, an
   174  // error is returned.
   175  func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error {
   176  	if _, ok := idx.Packs[id]; ok {
   177  		return errors.Errorf("pack %v already present in the index", id.Str())
   178  	}
   179  
   180  	idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries}
   181  
   182  	return nil
   183  }
   184  
   185  // RemovePack deletes a pack from the index.
   186  func (idx *Index) RemovePack(id restic.ID) error {
   187  	if _, ok := idx.Packs[id]; !ok {
   188  		return errors.Errorf("pack %v not found in the index", id.Str())
   189  	}
   190  
   191  	delete(idx.Packs, id)
   192  
   193  	return nil
   194  }
   195  
   196  // DuplicateBlobs returns a list of blobs that are stored more than once in the
   197  // repo.
   198  func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) {
   199  	dups = restic.NewBlobSet()
   200  	seen := restic.NewBlobSet()
   201  
   202  	for _, p := range idx.Packs {
   203  		for _, entry := range p.Entries {
   204  			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
   205  			if seen.Has(h) {
   206  				dups.Insert(h)
   207  			}
   208  			seen.Insert(h)
   209  		}
   210  	}
   211  
   212  	return dups
   213  }
   214  
   215  // PacksForBlobs returns the set of packs in which the blobs are contained.
   216  func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
   217  	packs = restic.NewIDSet()
   218  
   219  	for id, p := range idx.Packs {
   220  		for _, entry := range p.Entries {
   221  			if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) {
   222  				packs.Insert(id)
   223  			}
   224  		}
   225  	}
   226  
   227  	return packs
   228  }
   229  
   230  // Location describes the location of a blob in a pack.
   231  type Location struct {
   232  	PackID restic.ID
   233  	restic.Blob
   234  }
   235  
   236  // ErrBlobNotFound is return by FindBlob when the blob could not be found in
   237  // the index.
   238  var ErrBlobNotFound = errors.New("blob not found in index")
   239  
   240  // FindBlob returns a list of packs and positions the blob can be found in.
   241  func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) {
   242  	for id, p := range idx.Packs {
   243  		for _, entry := range p.Entries {
   244  			if entry.ID.Equal(h.ID) && entry.Type == h.Type {
   245  				result = append(result, Location{
   246  					PackID: id,
   247  					Blob:   entry,
   248  				})
   249  			}
   250  		}
   251  	}
   252  
   253  	if len(result) == 0 {
   254  		return nil, ErrBlobNotFound
   255  	}
   256  
   257  	return result, nil
   258  }
   259  
   260  const maxEntries = 3000
   261  
   262  // Save writes the complete index to the repo.
   263  func (idx *Index) Save(ctx context.Context, repo restic.Repository, supersedes restic.IDs) (restic.IDs, error) {
   264  	debug.Log("pack files: %d\n", len(idx.Packs))
   265  
   266  	var indexIDs []restic.ID
   267  
   268  	packs := 0
   269  	jsonIDX := &indexJSON{
   270  		Supersedes: supersedes,
   271  		Packs:      make([]packJSON, 0, maxEntries),
   272  	}
   273  
   274  	for packID, pack := range idx.Packs {
   275  		debug.Log("%04d add pack %v with %d entries", packs, packID, len(pack.Entries))
   276  		b := make([]blobJSON, 0, len(pack.Entries))
   277  		for _, blob := range pack.Entries {
   278  			b = append(b, blobJSON{
   279  				ID:     blob.ID,
   280  				Type:   blob.Type,
   281  				Offset: blob.Offset,
   282  				Length: blob.Length,
   283  			})
   284  		}
   285  
   286  		p := packJSON{
   287  			ID:    packID,
   288  			Blobs: b,
   289  		}
   290  
   291  		jsonIDX.Packs = append(jsonIDX.Packs, p)
   292  
   293  		packs++
   294  		if packs == maxEntries {
   295  			id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX)
   296  			if err != nil {
   297  				return nil, err
   298  			}
   299  			debug.Log("saved new index as %v", id)
   300  
   301  			indexIDs = append(indexIDs, id)
   302  			packs = 0
   303  			jsonIDX.Packs = jsonIDX.Packs[:0]
   304  		}
   305  	}
   306  
   307  	if packs > 0 {
   308  		id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX)
   309  		if err != nil {
   310  			return nil, err
   311  		}
   312  		debug.Log("saved new index as %v", id)
   313  		indexIDs = append(indexIDs, id)
   314  	}
   315  
   316  	return indexIDs, nil
   317  }