github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/pkg/tarsum/tarsum.go (about)

     1  // Package tarsum provides algorithms to perform checksum calculation on
     2  // filesystem layers.
     3  //
     4  // The transportation of filesystems, regarding Docker, is done with tar(1)
     5  // archives. There are a variety of tar serialization formats [2], and a key
     6  // concern here is ensuring a repeatable checksum given a set of inputs from a
     7  // generic tar archive. Types of transportation include distribution to and from a
     8  // registry endpoint, saving and loading through commands or Docker daemon APIs,
     9  // transferring the build context from client to Docker daemon, and committing the
    10  // filesystem of a container to become an image.
    11  //
    12  // As tar archives are used for transit, but not preserved in many situations, the
    13  // focus of the algorithm is to ensure the integrity of the preserved filesystem,
    14  // while maintaining a deterministic accountability. This includes neither
    15  // constraining the ordering or manipulation of the files during the creation or
    16  // unpacking of the archive, nor include additional metadata state about the file
    17  // system attributes.
    18  package tarsum // import "github.com/demonoid81/moby/pkg/tarsum"
    19  
    20  import (
    21  	"archive/tar"
    22  	"bytes"
    23  	"compress/gzip"
    24  	"crypto"
    25  	"crypto/sha256"
    26  	"encoding/hex"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"io"
    31  	"path"
    32  	"strings"
    33  )
    34  
    35  const (
    36  	buf8K  = 8 * 1024
    37  	buf16K = 16 * 1024
    38  	buf32K = 32 * 1024
    39  )
    40  
    41  // NewTarSum creates a new interface for calculating a fixed time checksum of a
    42  // tar archive.
    43  //
    44  // This is used for calculating checksums of layers of an image, in some cases
    45  // including the byte payload of the image's json metadata as well, and for
    46  // calculating the checksums for buildcache.
    47  func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    48  	return NewTarSumHash(r, dc, v, DefaultTHash)
    49  }
    50  
    51  // NewTarSumHash creates a new TarSum, providing a THash to use rather than
    52  // the DefaultTHash.
    53  func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    54  	headerSelector, err := getTarHeaderSelector(v)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  	ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    59  	err = ts.initTarSum()
    60  	return ts, err
    61  }
    62  
    63  // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
    64  func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    65  	parts := strings.SplitN(label, "+", 2)
    66  	if len(parts) != 2 {
    67  		return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    68  	}
    69  
    70  	versionName, hashName := parts[0], parts[1]
    71  
    72  	version, ok := tarSumVersionsByName[versionName]
    73  	if !ok {
    74  		return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    75  	}
    76  
    77  	hashConfig, ok := standardHashConfigs[hashName]
    78  	if !ok {
    79  		return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    80  	}
    81  
    82  	tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
    83  
    84  	return NewTarSumHash(r, disableCompression, version, tHash)
    85  }
    86  
    87  // TarSum is the generic interface for calculating fixed time
    88  // checksums of a tar archive.
    89  type TarSum interface {
    90  	io.Reader
    91  	GetSums() FileInfoSums
    92  	Sum([]byte) string
    93  	Version() Version
    94  	Hash() THash
    95  }
    96  
    97  // tarSum struct is the structure for a Version0 checksum calculation.
    98  type tarSum struct {
    99  	io.Reader
   100  	tarR               *tar.Reader
   101  	tarW               *tar.Writer
   102  	writer             writeCloseFlusher
   103  	bufTar             *bytes.Buffer
   104  	bufWriter          *bytes.Buffer
   105  	bufData            []byte
   106  	h                  hash.Hash
   107  	tHash              THash
   108  	sums               FileInfoSums
   109  	fileCounter        int64
   110  	currentFile        string
   111  	finished           bool
   112  	first              bool
   113  	DisableCompression bool              // false by default. When false, the output gzip compressed.
   114  	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
   115  	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
   116  }
   117  
   118  func (ts tarSum) Hash() THash {
   119  	return ts.tHash
   120  }
   121  
   122  func (ts tarSum) Version() Version {
   123  	return ts.tarSumVersion
   124  }
   125  
   126  // THash provides a hash.Hash type generator and its name.
   127  type THash interface {
   128  	Hash() hash.Hash
   129  	Name() string
   130  }
   131  
   132  // NewTHash is a convenience method for creating a THash.
   133  func NewTHash(name string, h func() hash.Hash) THash {
   134  	return simpleTHash{n: name, h: h}
   135  }
   136  
   137  type tHashConfig struct {
   138  	name string
   139  	hash crypto.Hash
   140  }
   141  
   142  var (
   143  	// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
   144  	standardHashConfigs = map[string]tHashConfig{
   145  		"sha256": {name: "sha256", hash: crypto.SHA256},
   146  		"sha512": {name: "sha512", hash: crypto.SHA512},
   147  	}
   148  )
   149  
   150  // DefaultTHash is default TarSum hashing algorithm - "sha256".
   151  var DefaultTHash = NewTHash("sha256", sha256.New)
   152  
   153  type simpleTHash struct {
   154  	n string
   155  	h func() hash.Hash
   156  }
   157  
   158  func (sth simpleTHash) Name() string    { return sth.n }
   159  func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
   160  
   161  func (ts *tarSum) encodeHeader(h *tar.Header) error {
   162  	for _, elem := range ts.headerSelector.selectHeaders(h) {
   163  		// Ignore these headers to be compatible with versions
   164  		// before go 1.10
   165  		if elem[0] == "gname" || elem[0] == "uname" {
   166  			elem[1] = ""
   167  		}
   168  		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
   169  			return err
   170  		}
   171  	}
   172  	return nil
   173  }
   174  
   175  func (ts *tarSum) initTarSum() error {
   176  	ts.bufTar = bytes.NewBuffer([]byte{})
   177  	ts.bufWriter = bytes.NewBuffer([]byte{})
   178  	ts.tarR = tar.NewReader(ts.Reader)
   179  	ts.tarW = tar.NewWriter(ts.bufTar)
   180  	if !ts.DisableCompression {
   181  		ts.writer = gzip.NewWriter(ts.bufWriter)
   182  	} else {
   183  		ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
   184  	}
   185  	if ts.tHash == nil {
   186  		ts.tHash = DefaultTHash
   187  	}
   188  	ts.h = ts.tHash.Hash()
   189  	ts.h.Reset()
   190  	ts.first = true
   191  	ts.sums = FileInfoSums{}
   192  	return nil
   193  }
   194  
   195  func (ts *tarSum) Read(buf []byte) (int, error) {
   196  	if ts.finished {
   197  		return ts.bufWriter.Read(buf)
   198  	}
   199  	if len(ts.bufData) < len(buf) {
   200  		switch {
   201  		case len(buf) <= buf8K:
   202  			ts.bufData = make([]byte, buf8K)
   203  		case len(buf) <= buf16K:
   204  			ts.bufData = make([]byte, buf16K)
   205  		case len(buf) <= buf32K:
   206  			ts.bufData = make([]byte, buf32K)
   207  		default:
   208  			ts.bufData = make([]byte, len(buf))
   209  		}
   210  	}
   211  	buf2 := ts.bufData[:len(buf)]
   212  
   213  	n, err := ts.tarR.Read(buf2)
   214  	if err != nil {
   215  		if err == io.EOF {
   216  			if _, err := ts.h.Write(buf2[:n]); err != nil {
   217  				return 0, err
   218  			}
   219  			if !ts.first {
   220  				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
   221  				ts.fileCounter++
   222  				ts.h.Reset()
   223  			} else {
   224  				ts.first = false
   225  			}
   226  
   227  			if _, err := ts.tarW.Write(buf2[:n]); err != nil {
   228  				return 0, err
   229  			}
   230  
   231  			currentHeader, err := ts.tarR.Next()
   232  			if err != nil {
   233  				if err == io.EOF {
   234  					if err := ts.tarW.Close(); err != nil {
   235  						return 0, err
   236  					}
   237  					if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   238  						return 0, err
   239  					}
   240  					if err := ts.writer.Close(); err != nil {
   241  						return 0, err
   242  					}
   243  					ts.finished = true
   244  					return ts.bufWriter.Read(buf)
   245  				}
   246  				return 0, err
   247  			}
   248  
   249  			ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name))
   250  			if err := ts.encodeHeader(currentHeader); err != nil {
   251  				return 0, err
   252  			}
   253  			if err := ts.tarW.WriteHeader(currentHeader); err != nil {
   254  				return 0, err
   255  			}
   256  
   257  			if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   258  				return 0, err
   259  			}
   260  			ts.writer.Flush()
   261  
   262  			return ts.bufWriter.Read(buf)
   263  		}
   264  		return 0, err
   265  	}
   266  
   267  	// Filling the hash buffer
   268  	if _, err = ts.h.Write(buf2[:n]); err != nil {
   269  		return 0, err
   270  	}
   271  
   272  	// Filling the tar writer
   273  	if _, err = ts.tarW.Write(buf2[:n]); err != nil {
   274  		return 0, err
   275  	}
   276  
   277  	// Filling the output writer
   278  	if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
   279  		return 0, err
   280  	}
   281  	ts.writer.Flush()
   282  
   283  	return ts.bufWriter.Read(buf)
   284  }
   285  
   286  func (ts *tarSum) Sum(extra []byte) string {
   287  	ts.sums.SortBySums()
   288  	h := ts.tHash.Hash()
   289  	if extra != nil {
   290  		h.Write(extra)
   291  	}
   292  	for _, fis := range ts.sums {
   293  		h.Write([]byte(fis.Sum()))
   294  	}
   295  	checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
   296  	return checksum
   297  }
   298  
   299  func (ts *tarSum) GetSums() FileInfoSums {
   300  	return ts.sums
   301  }