github.com/sijibomii/docker@v0.0.0-20231230191044-5cf6ca554647/pkg/tarsum/tarsum.go (about)

     1  // Package tarsum provides algorithms to perform checksum calculation on
     2  // filesystem layers.
     3  //
     4  // The transportation of filesystems, regarding Docker, is done with tar(1)
     5  // archives. There are a variety of tar serialization formats [2], and a key
     6  // concern here is ensuring a repeatable checksum given a set of inputs from a
     7  // generic tar archive. Types of transportation include distribution to and from a
     8  // registry endpoint, saving and loading through commands or Docker daemon APIs,
     9  // transferring the build context from client to Docker daemon, and committing the
    10  // filesystem of a container to become an image.
    11  //
    12  // As tar archives are used for transit, but not preserved in many situations, the
    13  // focus of the algorithm is to ensure the integrity of the preserved filesystem,
    14  // while maintaining a deterministic accountability. This includes neither
    15  // constraining the ordering or manipulation of the files during the creation or
    16  // unpacking of the archive, nor include additional metadata state about the file
    17  // system attributes.
    18  package tarsum
    19  
    20  import (
    21  	"archive/tar"
    22  	"bytes"
    23  	"compress/gzip"
    24  	"crypto"
    25  	"crypto/sha256"
    26  	"encoding/hex"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"io"
    31  	"strings"
    32  )
    33  
    34  const (
    35  	buf8K  = 8 * 1024
    36  	buf16K = 16 * 1024
    37  	buf32K = 32 * 1024
    38  )
    39  
    40  // NewTarSum creates a new interface for calculating a fixed time checksum of a
    41  // tar archive.
    42  //
    43  // This is used for calculating checksums of layers of an image, in some cases
    44  // including the byte payload of the image's json metadata as well, and for
    45  // calculating the checksums for buildcache.
    46  func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    47  	return NewTarSumHash(r, dc, v, DefaultTHash)
    48  }
    49  
    50  // NewTarSumHash creates a new TarSum, providing a THash to use rather than
    51  // the DefaultTHash.
    52  func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    53  	headerSelector, err := getTarHeaderSelector(v)
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  	ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    58  	err = ts.initTarSum()
    59  	return ts, err
    60  }
    61  
    62  // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
    63  func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    64  	parts := strings.SplitN(label, "+", 2)
    65  	if len(parts) != 2 {
    66  		return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    67  	}
    68  
    69  	versionName, hashName := parts[0], parts[1]
    70  
    71  	version, ok := tarSumVersionsByName[versionName]
    72  	if !ok {
    73  		return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    74  	}
    75  
    76  	hashConfig, ok := standardHashConfigs[hashName]
    77  	if !ok {
    78  		return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    79  	}
    80  
    81  	tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
    82  
    83  	return NewTarSumHash(r, disableCompression, version, tHash)
    84  }
    85  
    86  // TarSum is the generic interface for calculating fixed time
    87  // checksums of a tar archive.
    88  type TarSum interface {
    89  	io.Reader
    90  	GetSums() FileInfoSums
    91  	Sum([]byte) string
    92  	Version() Version
    93  	Hash() THash
    94  }
    95  
    96  // tarSum struct is the structure for a Version0 checksum calculation.
    97  type tarSum struct {
    98  	io.Reader
    99  	tarR               *tar.Reader
   100  	tarW               *tar.Writer
   101  	writer             writeCloseFlusher
   102  	bufTar             *bytes.Buffer
   103  	bufWriter          *bytes.Buffer
   104  	bufData            []byte
   105  	h                  hash.Hash
   106  	tHash              THash
   107  	sums               FileInfoSums
   108  	fileCounter        int64
   109  	currentFile        string
   110  	finished           bool
   111  	first              bool
   112  	DisableCompression bool              // false by default. When false, the output gzip compressed.
   113  	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
   114  	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
   115  }
   116  
   117  func (ts tarSum) Hash() THash {
   118  	return ts.tHash
   119  }
   120  
   121  func (ts tarSum) Version() Version {
   122  	return ts.tarSumVersion
   123  }
   124  
   125  // THash provides a hash.Hash type generator and its name.
   126  type THash interface {
   127  	Hash() hash.Hash
   128  	Name() string
   129  }
   130  
   131  // NewTHash is a convenience method for creating a THash.
   132  func NewTHash(name string, h func() hash.Hash) THash {
   133  	return simpleTHash{n: name, h: h}
   134  }
   135  
   136  type tHashConfig struct {
   137  	name string
   138  	hash crypto.Hash
   139  }
   140  
   141  var (
   142  	// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
   143  	standardHashConfigs = map[string]tHashConfig{
   144  		"sha256": {name: "sha256", hash: crypto.SHA256},
   145  		"sha512": {name: "sha512", hash: crypto.SHA512},
   146  	}
   147  )
   148  
   149  // DefaultTHash is default TarSum hashing algorithm - "sha256".
   150  var DefaultTHash = NewTHash("sha256", sha256.New)
   151  
   152  type simpleTHash struct {
   153  	n string
   154  	h func() hash.Hash
   155  }
   156  
   157  func (sth simpleTHash) Name() string    { return sth.n }
   158  func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
   159  
   160  func (ts *tarSum) encodeHeader(h *tar.Header) error {
   161  	for _, elem := range ts.headerSelector.selectHeaders(h) {
   162  		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
   163  			return err
   164  		}
   165  	}
   166  	return nil
   167  }
   168  
   169  func (ts *tarSum) initTarSum() error {
   170  	ts.bufTar = bytes.NewBuffer([]byte{})
   171  	ts.bufWriter = bytes.NewBuffer([]byte{})
   172  	ts.tarR = tar.NewReader(ts.Reader)
   173  	ts.tarW = tar.NewWriter(ts.bufTar)
   174  	if !ts.DisableCompression {
   175  		ts.writer = gzip.NewWriter(ts.bufWriter)
   176  	} else {
   177  		ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
   178  	}
   179  	if ts.tHash == nil {
   180  		ts.tHash = DefaultTHash
   181  	}
   182  	ts.h = ts.tHash.Hash()
   183  	ts.h.Reset()
   184  	ts.first = true
   185  	ts.sums = FileInfoSums{}
   186  	return nil
   187  }
   188  
   189  func (ts *tarSum) Read(buf []byte) (int, error) {
   190  	if ts.finished {
   191  		return ts.bufWriter.Read(buf)
   192  	}
   193  	if len(ts.bufData) < len(buf) {
   194  		switch {
   195  		case len(buf) <= buf8K:
   196  			ts.bufData = make([]byte, buf8K)
   197  		case len(buf) <= buf16K:
   198  			ts.bufData = make([]byte, buf16K)
   199  		case len(buf) <= buf32K:
   200  			ts.bufData = make([]byte, buf32K)
   201  		default:
   202  			ts.bufData = make([]byte, len(buf))
   203  		}
   204  	}
   205  	buf2 := ts.bufData[:len(buf)]
   206  
   207  	n, err := ts.tarR.Read(buf2)
   208  	if err != nil {
   209  		if err == io.EOF {
   210  			if _, err := ts.h.Write(buf2[:n]); err != nil {
   211  				return 0, err
   212  			}
   213  			if !ts.first {
   214  				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
   215  				ts.fileCounter++
   216  				ts.h.Reset()
   217  			} else {
   218  				ts.first = false
   219  			}
   220  
   221  			currentHeader, err := ts.tarR.Next()
   222  			if err != nil {
   223  				if err == io.EOF {
   224  					if err := ts.tarW.Close(); err != nil {
   225  						return 0, err
   226  					}
   227  					if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   228  						return 0, err
   229  					}
   230  					if err := ts.writer.Close(); err != nil {
   231  						return 0, err
   232  					}
   233  					ts.finished = true
   234  					return n, nil
   235  				}
   236  				return n, err
   237  			}
   238  			ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/")
   239  			if err := ts.encodeHeader(currentHeader); err != nil {
   240  				return 0, err
   241  			}
   242  			if err := ts.tarW.WriteHeader(currentHeader); err != nil {
   243  				return 0, err
   244  			}
   245  			if _, err := ts.tarW.Write(buf2[:n]); err != nil {
   246  				return 0, err
   247  			}
   248  			ts.tarW.Flush()
   249  			if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   250  				return 0, err
   251  			}
   252  			ts.writer.Flush()
   253  
   254  			return ts.bufWriter.Read(buf)
   255  		}
   256  		return n, err
   257  	}
   258  
   259  	// Filling the hash buffer
   260  	if _, err = ts.h.Write(buf2[:n]); err != nil {
   261  		return 0, err
   262  	}
   263  
   264  	// Filling the tar writer
   265  	if _, err = ts.tarW.Write(buf2[:n]); err != nil {
   266  		return 0, err
   267  	}
   268  	ts.tarW.Flush()
   269  
   270  	// Filling the output writer
   271  	if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
   272  		return 0, err
   273  	}
   274  	ts.writer.Flush()
   275  
   276  	return ts.bufWriter.Read(buf)
   277  }
   278  
   279  func (ts *tarSum) Sum(extra []byte) string {
   280  	ts.sums.SortBySums()
   281  	h := ts.tHash.Hash()
   282  	if extra != nil {
   283  		h.Write(extra)
   284  	}
   285  	for _, fis := range ts.sums {
   286  		h.Write([]byte(fis.Sum()))
   287  	}
   288  	checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
   289  	return checksum
   290  }
   291  
   292  func (ts *tarSum) GetSums() FileInfoSums {
   293  	return ts.sums
   294  }