github.com/outbrain/consul@v1.4.5/snapshot/archive.go (about)

     1  // The archive utilities manage the internal format of a snapshot, which is a
     2  // tar file with the following contents:
     3  //
     4  // meta.json  - JSON-encoded snapshot metadata from Raft
     5  // state.bin  - Encoded snapshot data from Raft
     6  // SHA256SUMS - SHA-256 sums of the above two files
     7  //
     8  // The integrity information is automatically created and checked, and a failure
     9  // there just looks like an error to the caller.
    10  package snapshot
    11  
    12  import (
    13  	"archive/tar"
    14  	"bufio"
    15  	"bytes"
    16  	"crypto/sha256"
    17  	"encoding/json"
    18  	"fmt"
    19  	"hash"
    20  	"io"
    21  	"io/ioutil"
    22  	"time"
    23  
    24  	"github.com/hashicorp/raft"
    25  )
    26  
    27  // hashList manages a list of filenames and their hashes.
    28  type hashList struct {
    29  	hashes map[string]hash.Hash
    30  }
    31  
    32  // newHashList returns a new hashList.
    33  func newHashList() *hashList {
    34  	return &hashList{
    35  		hashes: make(map[string]hash.Hash),
    36  	}
    37  }
    38  
    39  // Add creates a new hash for the given file.
    40  func (hl *hashList) Add(file string) hash.Hash {
    41  	if existing, ok := hl.hashes[file]; ok {
    42  		return existing
    43  	}
    44  
    45  	h := sha256.New()
    46  	hl.hashes[file] = h
    47  	return h
    48  }
    49  
    50  // Encode takes the current sum of all the hashes and saves the hash list as a
    51  // SHA256SUMS-style text file.
    52  func (hl *hashList) Encode(w io.Writer) error {
    53  	for file, h := range hl.hashes {
    54  		if _, err := fmt.Fprintf(w, "%x  %s\n", h.Sum([]byte{}), file); err != nil {
    55  			return err
    56  		}
    57  	}
    58  	return nil
    59  }
    60  
    61  // DecodeAndVerify reads a SHA256SUMS-style text file and checks the results
    62  // against the current sums for all the hashes.
    63  func (hl *hashList) DecodeAndVerify(r io.Reader) error {
    64  	// Read the file and make sure everything in there has a matching hash.
    65  	seen := make(map[string]struct{})
    66  	s := bufio.NewScanner(r)
    67  	for s.Scan() {
    68  		sha := make([]byte, sha256.Size)
    69  		var file string
    70  		if _, err := fmt.Sscanf(s.Text(), "%x  %s", &sha, &file); err != nil {
    71  			return err
    72  		}
    73  
    74  		h, ok := hl.hashes[file]
    75  		if !ok {
    76  			return fmt.Errorf("list missing hash for %q", file)
    77  		}
    78  		if !bytes.Equal(sha, h.Sum([]byte{})) {
    79  			return fmt.Errorf("hash check failed for %q", file)
    80  		}
    81  		seen[file] = struct{}{}
    82  	}
    83  	if err := s.Err(); err != nil {
    84  		return err
    85  	}
    86  
    87  	// Make sure everything we had a hash for was seen.
    88  	for file := range hl.hashes {
    89  		if _, ok := seen[file]; !ok {
    90  			return fmt.Errorf("file missing for %q", file)
    91  		}
    92  	}
    93  
    94  	return nil
    95  }
    96  
    97  // write takes a writer and creates an archive with the snapshot metadata,
    98  // the snapshot itself, and adds some integrity checking information.
    99  func write(out io.Writer, metadata *raft.SnapshotMeta, snap io.Reader) error {
   100  	// Start a new tarball.
   101  	now := time.Now()
   102  	archive := tar.NewWriter(out)
   103  
   104  	// Create a hash list that we will use to write a SHA256SUMS file into
   105  	// the archive.
   106  	hl := newHashList()
   107  
   108  	// Encode the snapshot metadata, which we need to feed back during a
   109  	// restore.
   110  	metaHash := hl.Add("meta.json")
   111  	var metaBuffer bytes.Buffer
   112  	enc := json.NewEncoder(&metaBuffer)
   113  	if err := enc.Encode(metadata); err != nil {
   114  		return fmt.Errorf("failed to encode snapshot metadata: %v", err)
   115  	}
   116  	if err := archive.WriteHeader(&tar.Header{
   117  		Name:    "meta.json",
   118  		Mode:    0600,
   119  		Size:    int64(metaBuffer.Len()),
   120  		ModTime: now,
   121  	}); err != nil {
   122  		return fmt.Errorf("failed to write snapshot metadata header: %v", err)
   123  	}
   124  	if _, err := io.Copy(archive, io.TeeReader(&metaBuffer, metaHash)); err != nil {
   125  		return fmt.Errorf("failed to write snapshot metadata: %v", err)
   126  	}
   127  
   128  	// Copy the snapshot data given the size from the metadata.
   129  	snapHash := hl.Add("state.bin")
   130  	if err := archive.WriteHeader(&tar.Header{
   131  		Name:    "state.bin",
   132  		Mode:    0600,
   133  		Size:    metadata.Size,
   134  		ModTime: now,
   135  	}); err != nil {
   136  		return fmt.Errorf("failed to write snapshot data header: %v", err)
   137  	}
   138  	if _, err := io.CopyN(archive, io.TeeReader(snap, snapHash), metadata.Size); err != nil {
   139  		return fmt.Errorf("failed to write snapshot metadata: %v", err)
   140  	}
   141  
   142  	// Create a SHA256SUMS file that we can use to verify on restore.
   143  	var shaBuffer bytes.Buffer
   144  	if err := hl.Encode(&shaBuffer); err != nil {
   145  		return fmt.Errorf("failed to encode snapshot hashes: %v", err)
   146  	}
   147  	if err := archive.WriteHeader(&tar.Header{
   148  		Name:    "SHA256SUMS",
   149  		Mode:    0600,
   150  		Size:    int64(shaBuffer.Len()),
   151  		ModTime: now,
   152  	}); err != nil {
   153  		return fmt.Errorf("failed to write snapshot hashes header: %v", err)
   154  	}
   155  	if _, err := io.Copy(archive, &shaBuffer); err != nil {
   156  		return fmt.Errorf("failed to write snapshot metadata: %v", err)
   157  	}
   158  
   159  	// Finalize the archive.
   160  	if err := archive.Close(); err != nil {
   161  		return fmt.Errorf("failed to finalize snapshot: %v", err)
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // read takes a reader and extracts the snapshot metadata and the snapshot
   168  // itself, and also checks the integrity of the data. You must arrange to call
   169  // Close() on the returned object or else you will leak a temporary file.
   170  func read(in io.Reader, metadata *raft.SnapshotMeta, snap io.Writer) error {
   171  	// Start a new tar reader.
   172  	archive := tar.NewReader(in)
   173  
   174  	// Create a hash list that we will use to compare with the SHA256SUMS
   175  	// file in the archive.
   176  	hl := newHashList()
   177  
   178  	// Populate the hashes for all the files we expect to see. The check at
   179  	// the end will make sure these are all present in the SHA256SUMS file
   180  	// and that the hashes match.
   181  	metaHash := hl.Add("meta.json")
   182  	snapHash := hl.Add("state.bin")
   183  
   184  	// Look through the archive for the pieces we care about.
   185  	var shaBuffer bytes.Buffer
   186  	for {
   187  		hdr, err := archive.Next()
   188  		if err == io.EOF {
   189  			break
   190  		}
   191  		if err != nil {
   192  			return fmt.Errorf("failed reading snapshot: %v", err)
   193  		}
   194  
   195  		switch hdr.Name {
   196  		case "meta.json":
   197  			// Previously we used json.Decode to decode the archive stream. There are
   198  			// edgecases in which it doesn't read all the bytes from the stream, even
   199  			// though the json object is still being parsed properly. Since we
   200  			// simutaniously feeded everything to metaHash, our hash ended up being
   201  			// different than what we calculated when creating the snapshot. Which in
   202  			// turn made the snapshot verification fail. By explicitly reading the
   203  			// whole thing first we ensure that we calculate the correct hash
   204  			// independent of how json.Decode works internally.
   205  			buf, err := ioutil.ReadAll(io.TeeReader(archive, metaHash))
   206  			if err != nil {
   207  				return fmt.Errorf("failed to read snapshot metadata: %v", err)
   208  			}
   209  			if err := json.Unmarshal(buf, &metadata); err != nil {
   210  				return fmt.Errorf("failed to decode snapshot metadata: %v", err)
   211  			}
   212  
   213  		case "state.bin":
   214  			if _, err := io.Copy(io.MultiWriter(snap, snapHash), archive); err != nil {
   215  				return fmt.Errorf("failed to read or write snapshot data: %v", err)
   216  			}
   217  
   218  		case "SHA256SUMS":
   219  			if _, err := io.Copy(&shaBuffer, archive); err != nil {
   220  				return fmt.Errorf("failed to read snapshot hashes: %v", err)
   221  			}
   222  
   223  		default:
   224  			return fmt.Errorf("unexpected file %q in snapshot", hdr.Name)
   225  		}
   226  
   227  	}
   228  
   229  	// Verify all the hashes.
   230  	if err := hl.DecodeAndVerify(&shaBuffer); err != nil {
   231  		return fmt.Errorf("failed checking integrity of snapshot: %v", err)
   232  	}
   233  
   234  	return nil
   235  }