github.com/rish1988/moby@v25.0.2+incompatible/pkg/tarsum/tarsum.go (about)

     1  // Package tarsum provides algorithms to perform checksum calculation on
     2  // filesystem layers.
     3  //
     4  // The transportation of filesystems, regarding Docker, is done with tar(1)
     5  // archives. There are a variety of tar serialization formats [2], and a key
     6  // concern here is ensuring a repeatable checksum given a set of inputs from a
     7  // generic tar archive. Types of transportation include distribution to and from a
     8  // registry endpoint, saving and loading through commands or Docker daemon APIs,
     9  // transferring the build context from client to Docker daemon, and committing the
    10  // filesystem of a container to become an image.
    11  //
    12  // As tar archives are used for transit, but not preserved in many situations, the
    13  // focus of the algorithm is to ensure the integrity of the preserved filesystem,
    14  // while maintaining a deterministic accountability. This includes neither
    15  // constraining the ordering or manipulation of the files during the creation or
    16  // unpacking of the archive, nor include additional metadata state about the file
    17  // system attributes.
    18  package tarsum // import "github.com/docker/docker/pkg/tarsum"
    19  
    20  import (
    21  	"archive/tar"
    22  	"bytes"
    23  	"compress/gzip"
    24  	"crypto"
    25  	"crypto/sha256"
    26  	"encoding/hex"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"io"
    31  	"path"
    32  	"strings"
    33  )
    34  
    35  const (
    36  	buf8K  = 8 * 1024
    37  	buf16K = 16 * 1024
    38  	buf32K = 32 * 1024
    39  )
    40  
    41  // NewTarSum creates a new interface for calculating a fixed time checksum of a
    42  // tar archive.
    43  //
    44  // This is used for calculating checksums of layers of an image, in some cases
    45  // including the byte payload of the image's json metadata as well, and for
    46  // calculating the checksums for buildcache.
    47  func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    48  	return NewTarSumHash(r, dc, v, DefaultTHash)
    49  }
    50  
    51  // NewTarSumHash creates a new TarSum, providing a THash to use rather than
    52  // the DefaultTHash.
    53  func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    54  	headerSelector, err := getTarHeaderSelector(v)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  	ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    59  	err = ts.initTarSum()
    60  	return ts, err
    61  }
    62  
    63  // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
    64  func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    65  	versionName, hashName, ok := strings.Cut(label, "+")
    66  	if !ok {
    67  		return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    68  	}
    69  
    70  	version, ok := tarSumVersionsByName[versionName]
    71  	if !ok {
    72  		return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    73  	}
    74  
    75  	hashConfig, ok := standardHashConfigs[hashName]
    76  	if !ok {
    77  		return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    78  	}
    79  
    80  	tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
    81  
    82  	return NewTarSumHash(r, disableCompression, version, tHash)
    83  }
    84  
    85  // TarSum is the generic interface for calculating fixed time
    86  // checksums of a tar archive.
    87  type TarSum interface {
    88  	io.Reader
    89  	GetSums() FileInfoSums
    90  	Sum([]byte) string
    91  	Version() Version
    92  	Hash() THash
    93  }
    94  
    95  // tarSum struct is the structure for a Version0 checksum calculation.
    96  type tarSum struct {
    97  	io.Reader
    98  	tarR               *tar.Reader
    99  	tarW               *tar.Writer
   100  	writer             writeCloseFlusher
   101  	bufTar             *bytes.Buffer
   102  	bufWriter          *bytes.Buffer
   103  	bufData            []byte
   104  	h                  hash.Hash
   105  	tHash              THash
   106  	sums               FileInfoSums
   107  	fileCounter        int64
   108  	currentFile        string
   109  	finished           bool
   110  	first              bool
   111  	DisableCompression bool              // false by default. When false, the output gzip compressed.
   112  	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
   113  	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
   114  }
   115  
   116  func (ts tarSum) Hash() THash {
   117  	return ts.tHash
   118  }
   119  
   120  func (ts tarSum) Version() Version {
   121  	return ts.tarSumVersion
   122  }
   123  
   124  // THash provides a hash.Hash type generator and its name.
   125  type THash interface {
   126  	Hash() hash.Hash
   127  	Name() string
   128  }
   129  
   130  // NewTHash is a convenience method for creating a THash.
   131  func NewTHash(name string, h func() hash.Hash) THash {
   132  	return simpleTHash{n: name, h: h}
   133  }
   134  
   135  type tHashConfig struct {
   136  	name string
   137  	hash crypto.Hash
   138  }
   139  
   140  // NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
   141  var standardHashConfigs = map[string]tHashConfig{
   142  	"sha256": {name: "sha256", hash: crypto.SHA256},
   143  	"sha512": {name: "sha512", hash: crypto.SHA512},
   144  }
   145  
   146  // DefaultTHash is default TarSum hashing algorithm - "sha256".
   147  var DefaultTHash = NewTHash("sha256", sha256.New)
   148  
   149  type simpleTHash struct {
   150  	n string
   151  	h func() hash.Hash
   152  }
   153  
   154  func (sth simpleTHash) Name() string    { return sth.n }
   155  func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
   156  
   157  func (ts *tarSum) encodeHeader(h *tar.Header) error {
   158  	for _, elem := range ts.headerSelector.selectHeaders(h) {
   159  		// Ignore these headers to be compatible with versions
   160  		// before go 1.10
   161  		if elem[0] == "gname" || elem[0] == "uname" {
   162  			elem[1] = ""
   163  		}
   164  		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
   165  			return err
   166  		}
   167  	}
   168  	return nil
   169  }
   170  
   171  func (ts *tarSum) initTarSum() error {
   172  	ts.bufTar = bytes.NewBuffer([]byte{})
   173  	ts.bufWriter = bytes.NewBuffer([]byte{})
   174  	ts.tarR = tar.NewReader(ts.Reader)
   175  	ts.tarW = tar.NewWriter(ts.bufTar)
   176  	if !ts.DisableCompression {
   177  		ts.writer = gzip.NewWriter(ts.bufWriter)
   178  	} else {
   179  		ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
   180  	}
   181  	if ts.tHash == nil {
   182  		ts.tHash = DefaultTHash
   183  	}
   184  	ts.h = ts.tHash.Hash()
   185  	ts.h.Reset()
   186  	ts.first = true
   187  	ts.sums = FileInfoSums{}
   188  	return nil
   189  }
   190  
   191  func (ts *tarSum) Read(buf []byte) (int, error) {
   192  	if ts.finished {
   193  		return ts.bufWriter.Read(buf)
   194  	}
   195  	if len(ts.bufData) < len(buf) {
   196  		switch {
   197  		case len(buf) <= buf8K:
   198  			ts.bufData = make([]byte, buf8K)
   199  		case len(buf) <= buf16K:
   200  			ts.bufData = make([]byte, buf16K)
   201  		case len(buf) <= buf32K:
   202  			ts.bufData = make([]byte, buf32K)
   203  		default:
   204  			ts.bufData = make([]byte, len(buf))
   205  		}
   206  	}
   207  	buf2 := ts.bufData[:len(buf)]
   208  
   209  	n, err := ts.tarR.Read(buf2)
   210  	if err != nil {
   211  		if err == io.EOF {
   212  			if _, err := ts.h.Write(buf2[:n]); err != nil {
   213  				return 0, err
   214  			}
   215  			if !ts.first {
   216  				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
   217  				ts.fileCounter++
   218  				ts.h.Reset()
   219  			} else {
   220  				ts.first = false
   221  			}
   222  
   223  			if _, err := ts.tarW.Write(buf2[:n]); err != nil {
   224  				return 0, err
   225  			}
   226  
   227  			currentHeader, err := ts.tarR.Next()
   228  			if err != nil {
   229  				if err == io.EOF {
   230  					if err := ts.tarW.Close(); err != nil {
   231  						return 0, err
   232  					}
   233  					if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   234  						return 0, err
   235  					}
   236  					if err := ts.writer.Close(); err != nil {
   237  						return 0, err
   238  					}
   239  					ts.finished = true
   240  					return ts.bufWriter.Read(buf)
   241  				}
   242  				return 0, err
   243  			}
   244  
   245  			//#nosec G305 -- The joined path is not passed to any filesystem APIs.
   246  			ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name))
   247  			if err := ts.encodeHeader(currentHeader); err != nil {
   248  				return 0, err
   249  			}
   250  			if err := ts.tarW.WriteHeader(currentHeader); err != nil {
   251  				return 0, err
   252  			}
   253  
   254  			if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   255  				return 0, err
   256  			}
   257  			ts.writer.Flush()
   258  
   259  			return ts.bufWriter.Read(buf)
   260  		}
   261  		return 0, err
   262  	}
   263  
   264  	// Filling the hash buffer
   265  	if _, err = ts.h.Write(buf2[:n]); err != nil {
   266  		return 0, err
   267  	}
   268  
   269  	// Filling the tar writer
   270  	if _, err = ts.tarW.Write(buf2[:n]); err != nil {
   271  		return 0, err
   272  	}
   273  
   274  	// Filling the output writer
   275  	if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
   276  		return 0, err
   277  	}
   278  	ts.writer.Flush()
   279  
   280  	return ts.bufWriter.Read(buf)
   281  }
   282  
   283  func (ts *tarSum) Sum(extra []byte) string {
   284  	ts.sums.SortBySums()
   285  	h := ts.tHash.Hash()
   286  	if extra != nil {
   287  		h.Write(extra)
   288  	}
   289  	for _, fis := range ts.sums {
   290  		h.Write([]byte(fis.Sum()))
   291  	}
   292  	checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
   293  	return checksum
   294  }
   295  
   296  func (ts *tarSum) GetSums() FileInfoSums {
   297  	return ts.sums
   298  }