github.com/endophage/docker@v1.4.2-0.20161027011718-242853499895/pkg/tarsum/tarsum.go (about)

     1  // Package tarsum provides algorithms to perform checksum calculation on
     2  // filesystem layers.
     3  //
     4  // The transportation of filesystems, regarding Docker, is done with tar(1)
     5  // archives. There are a variety of tar serialization formats [2], and a key
     6  // concern here is ensuring a repeatable checksum given a set of inputs from a
     7  // generic tar archive. Types of transportation include distribution to and from a
     8  // registry endpoint, saving and loading through commands or Docker daemon APIs,
     9  // transferring the build context from client to Docker daemon, and committing the
    10  // filesystem of a container to become an image.
    11  //
    12  // As tar archives are used for transit, but not preserved in many situations, the
    13  // focus of the algorithm is to ensure the integrity of the preserved filesystem,
    14  // while maintaining a deterministic accountability. This includes neither
    15  // constraining the ordering or manipulation of the files during the creation or
    16  // unpacking of the archive, nor include additional metadata state about the file
    17  // system attributes.
    18  package tarsum
    19  
    20  import (
    21  	"archive/tar"
    22  	"bytes"
    23  	"compress/gzip"
    24  	"crypto"
    25  	"crypto/sha256"
    26  	"encoding/hex"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"io"
    31  	"path"
    32  	"strings"
    33  )
    34  
    35  const (
    36  	buf8K  = 8 * 1024
    37  	buf16K = 16 * 1024
    38  	buf32K = 32 * 1024
    39  )
    40  
    41  // NewTarSum creates a new interface for calculating a fixed time checksum of a
    42  // tar archive.
    43  //
    44  // This is used for calculating checksums of layers of an image, in some cases
    45  // including the byte payload of the image's json metadata as well, and for
    46  // calculating the checksums for buildcache.
    47  func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    48  	return NewTarSumHash(r, dc, v, DefaultTHash)
    49  }
    50  
    51  // NewTarSumHash creates a new TarSum, providing a THash to use rather than
    52  // the DefaultTHash.
    53  func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    54  	headerSelector, err := getTarHeaderSelector(v)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  	ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    59  	err = ts.initTarSum()
    60  	return ts, err
    61  }
    62  
    63  // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
    64  func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    65  	parts := strings.SplitN(label, "+", 2)
    66  	if len(parts) != 2 {
    67  		return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    68  	}
    69  
    70  	versionName, hashName := parts[0], parts[1]
    71  
    72  	version, ok := tarSumVersionsByName[versionName]
    73  	if !ok {
    74  		return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    75  	}
    76  
    77  	hashConfig, ok := standardHashConfigs[hashName]
    78  	if !ok {
    79  		return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    80  	}
    81  
    82  	tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
    83  
    84  	return NewTarSumHash(r, disableCompression, version, tHash)
    85  }
    86  
    87  // TarSum is the generic interface for calculating fixed time
    88  // checksums of a tar archive.
    89  type TarSum interface {
    90  	io.Reader
    91  	GetSums() FileInfoSums
    92  	Sum([]byte) string
    93  	Version() Version
    94  	Hash() THash
    95  }
    96  
    97  // tarSum struct is the structure for a Version0 checksum calculation.
    98  type tarSum struct {
    99  	io.Reader
   100  	tarR               *tar.Reader
   101  	tarW               *tar.Writer
   102  	writer             writeCloseFlusher
   103  	bufTar             *bytes.Buffer
   104  	bufWriter          *bytes.Buffer
   105  	bufData            []byte
   106  	h                  hash.Hash
   107  	tHash              THash
   108  	sums               FileInfoSums
   109  	fileCounter        int64
   110  	currentFile        string
   111  	finished           bool
   112  	first              bool
   113  	DisableCompression bool              // false by default. When false, the output gzip compressed.
   114  	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
   115  	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
   116  }
   117  
   118  func (ts tarSum) Hash() THash {
   119  	return ts.tHash
   120  }
   121  
   122  func (ts tarSum) Version() Version {
   123  	return ts.tarSumVersion
   124  }
   125  
   126  // THash provides a hash.Hash type generator and its name.
   127  type THash interface {
   128  	Hash() hash.Hash
   129  	Name() string
   130  }
   131  
   132  // NewTHash is a convenience method for creating a THash.
   133  func NewTHash(name string, h func() hash.Hash) THash {
   134  	return simpleTHash{n: name, h: h}
   135  }
   136  
   137  type tHashConfig struct {
   138  	name string
   139  	hash crypto.Hash
   140  }
   141  
   142  var (
   143  	// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
   144  	standardHashConfigs = map[string]tHashConfig{
   145  		"sha256": {name: "sha256", hash: crypto.SHA256},
   146  		"sha512": {name: "sha512", hash: crypto.SHA512},
   147  	}
   148  )
   149  
   150  // DefaultTHash is default TarSum hashing algorithm - "sha256".
   151  var DefaultTHash = NewTHash("sha256", sha256.New)
   152  
   153  type simpleTHash struct {
   154  	n string
   155  	h func() hash.Hash
   156  }
   157  
   158  func (sth simpleTHash) Name() string    { return sth.n }
   159  func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
   160  
   161  func (ts *tarSum) encodeHeader(h *tar.Header) error {
   162  	for _, elem := range ts.headerSelector.selectHeaders(h) {
   163  		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
   164  			return err
   165  		}
   166  	}
   167  	return nil
   168  }
   169  
   170  func (ts *tarSum) initTarSum() error {
   171  	ts.bufTar = bytes.NewBuffer([]byte{})
   172  	ts.bufWriter = bytes.NewBuffer([]byte{})
   173  	ts.tarR = tar.NewReader(ts.Reader)
   174  	ts.tarW = tar.NewWriter(ts.bufTar)
   175  	if !ts.DisableCompression {
   176  		ts.writer = gzip.NewWriter(ts.bufWriter)
   177  	} else {
   178  		ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
   179  	}
   180  	if ts.tHash == nil {
   181  		ts.tHash = DefaultTHash
   182  	}
   183  	ts.h = ts.tHash.Hash()
   184  	ts.h.Reset()
   185  	ts.first = true
   186  	ts.sums = FileInfoSums{}
   187  	return nil
   188  }
   189  
   190  func (ts *tarSum) Read(buf []byte) (int, error) {
   191  	if ts.finished {
   192  		return ts.bufWriter.Read(buf)
   193  	}
   194  	if len(ts.bufData) < len(buf) {
   195  		switch {
   196  		case len(buf) <= buf8K:
   197  			ts.bufData = make([]byte, buf8K)
   198  		case len(buf) <= buf16K:
   199  			ts.bufData = make([]byte, buf16K)
   200  		case len(buf) <= buf32K:
   201  			ts.bufData = make([]byte, buf32K)
   202  		default:
   203  			ts.bufData = make([]byte, len(buf))
   204  		}
   205  	}
   206  	buf2 := ts.bufData[:len(buf)]
   207  
   208  	n, err := ts.tarR.Read(buf2)
   209  	if err != nil {
   210  		if err == io.EOF {
   211  			if _, err := ts.h.Write(buf2[:n]); err != nil {
   212  				return 0, err
   213  			}
   214  			if !ts.first {
   215  				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
   216  				ts.fileCounter++
   217  				ts.h.Reset()
   218  			} else {
   219  				ts.first = false
   220  			}
   221  
   222  			currentHeader, err := ts.tarR.Next()
   223  			if err != nil {
   224  				if err == io.EOF {
   225  					if err := ts.tarW.Close(); err != nil {
   226  						return 0, err
   227  					}
   228  					if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   229  						return 0, err
   230  					}
   231  					if err := ts.writer.Close(); err != nil {
   232  						return 0, err
   233  					}
   234  					ts.finished = true
   235  					return n, nil
   236  				}
   237  				return n, err
   238  			}
   239  			ts.currentFile = path.Clean(currentHeader.Name)
   240  			if err := ts.encodeHeader(currentHeader); err != nil {
   241  				return 0, err
   242  			}
   243  			if err := ts.tarW.WriteHeader(currentHeader); err != nil {
   244  				return 0, err
   245  			}
   246  			if _, err := ts.tarW.Write(buf2[:n]); err != nil {
   247  				return 0, err
   248  			}
   249  			ts.tarW.Flush()
   250  			if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   251  				return 0, err
   252  			}
   253  			ts.writer.Flush()
   254  
   255  			return ts.bufWriter.Read(buf)
   256  		}
   257  		return n, err
   258  	}
   259  
   260  	// Filling the hash buffer
   261  	if _, err = ts.h.Write(buf2[:n]); err != nil {
   262  		return 0, err
   263  	}
   264  
   265  	// Filling the tar writer
   266  	if _, err = ts.tarW.Write(buf2[:n]); err != nil {
   267  		return 0, err
   268  	}
   269  	ts.tarW.Flush()
   270  
   271  	// Filling the output writer
   272  	if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
   273  		return 0, err
   274  	}
   275  	ts.writer.Flush()
   276  
   277  	return ts.bufWriter.Read(buf)
   278  }
   279  
   280  func (ts *tarSum) Sum(extra []byte) string {
   281  	ts.sums.SortBySums()
   282  	h := ts.tHash.Hash()
   283  	if extra != nil {
   284  		h.Write(extra)
   285  	}
   286  	for _, fis := range ts.sums {
   287  		h.Write([]byte(fis.Sum()))
   288  	}
   289  	checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
   290  	return checksum
   291  }
   292  
   293  func (ts *tarSum) GetSums() FileInfoSums {
   294  	return ts.sums
   295  }