github.com/jandre/docker@v1.7.0/pkg/tarsum/tarsum.go (about)

     1  package tarsum
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"compress/gzip"
     7  	"crypto"
     8  	"crypto/sha256"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"hash"
    13  	"io"
    14  	"strings"
    15  )
    16  
    17  const (
    18  	buf8K  = 8 * 1024
    19  	buf16K = 16 * 1024
    20  	buf32K = 32 * 1024
    21  )
    22  
    23  // NewTarSum creates a new interface for calculating a fixed time checksum of a
    24  // tar archive.
    25  //
    26  // This is used for calculating checksums of layers of an image, in some cases
    27  // including the byte payload of the image's json metadata as well, and for
    28  // calculating the checksums for buildcache.
    29  func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    30  	return NewTarSumHash(r, dc, v, DefaultTHash)
    31  }
    32  
    33  // Create a new TarSum, providing a THash to use rather than the DefaultTHash
    34  func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    35  	headerSelector, err := getTarHeaderSelector(v)
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  	ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    40  	err = ts.initTarSum()
    41  	return ts, err
    42  }
    43  
    44  // Create a new TarSum using the provided TarSum version+hash label.
    45  func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    46  	parts := strings.SplitN(label, "+", 2)
    47  	if len(parts) != 2 {
    48  		return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    49  	}
    50  
    51  	versionName, hashName := parts[0], parts[1]
    52  
    53  	version, ok := tarSumVersionsByName[versionName]
    54  	if !ok {
    55  		return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    56  	}
    57  
    58  	hashConfig, ok := standardHashConfigs[hashName]
    59  	if !ok {
    60  		return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    61  	}
    62  
    63  	tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
    64  
    65  	return NewTarSumHash(r, disableCompression, version, tHash)
    66  }
    67  
    68  // TarSum is the generic interface for calculating fixed time
    69  // checksums of a tar archive
    70  type TarSum interface {
    71  	io.Reader
    72  	GetSums() FileInfoSums
    73  	Sum([]byte) string
    74  	Version() Version
    75  	Hash() THash
    76  }
    77  
    78  // tarSum struct is the structure for a Version0 checksum calculation
    79  type tarSum struct {
    80  	io.Reader
    81  	tarR               *tar.Reader
    82  	tarW               *tar.Writer
    83  	writer             writeCloseFlusher
    84  	bufTar             *bytes.Buffer
    85  	bufWriter          *bytes.Buffer
    86  	bufData            []byte
    87  	h                  hash.Hash
    88  	tHash              THash
    89  	sums               FileInfoSums
    90  	fileCounter        int64
    91  	currentFile        string
    92  	finished           bool
    93  	first              bool
    94  	DisableCompression bool              // false by default. When false, the output gzip compressed.
    95  	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
    96  	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
    97  }
    98  
    99  func (ts tarSum) Hash() THash {
   100  	return ts.tHash
   101  }
   102  
   103  func (ts tarSum) Version() Version {
   104  	return ts.tarSumVersion
   105  }
   106  
   107  // A hash.Hash type generator and its name
   108  type THash interface {
   109  	Hash() hash.Hash
   110  	Name() string
   111  }
   112  
   113  // Convenience method for creating a THash
   114  func NewTHash(name string, h func() hash.Hash) THash {
   115  	return simpleTHash{n: name, h: h}
   116  }
   117  
   118  type tHashConfig struct {
   119  	name string
   120  	hash crypto.Hash
   121  }
   122  
   123  var (
   124  	// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
   125  	standardHashConfigs = map[string]tHashConfig{
   126  		"sha256": {name: "sha256", hash: crypto.SHA256},
   127  		"sha512": {name: "sha512", hash: crypto.SHA512},
   128  	}
   129  )
   130  
   131  // TarSum default is "sha256"
   132  var DefaultTHash = NewTHash("sha256", sha256.New)
   133  
   134  type simpleTHash struct {
   135  	n string
   136  	h func() hash.Hash
   137  }
   138  
   139  func (sth simpleTHash) Name() string    { return sth.n }
   140  func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
   141  
   142  func (ts *tarSum) encodeHeader(h *tar.Header) error {
   143  	for _, elem := range ts.headerSelector.selectHeaders(h) {
   144  		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
   145  			return err
   146  		}
   147  	}
   148  	return nil
   149  }
   150  
   151  func (ts *tarSum) initTarSum() error {
   152  	ts.bufTar = bytes.NewBuffer([]byte{})
   153  	ts.bufWriter = bytes.NewBuffer([]byte{})
   154  	ts.tarR = tar.NewReader(ts.Reader)
   155  	ts.tarW = tar.NewWriter(ts.bufTar)
   156  	if !ts.DisableCompression {
   157  		ts.writer = gzip.NewWriter(ts.bufWriter)
   158  	} else {
   159  		ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
   160  	}
   161  	if ts.tHash == nil {
   162  		ts.tHash = DefaultTHash
   163  	}
   164  	ts.h = ts.tHash.Hash()
   165  	ts.h.Reset()
   166  	ts.first = true
   167  	ts.sums = FileInfoSums{}
   168  	return nil
   169  }
   170  
   171  func (ts *tarSum) Read(buf []byte) (int, error) {
   172  	if ts.finished {
   173  		return ts.bufWriter.Read(buf)
   174  	}
   175  	if len(ts.bufData) < len(buf) {
   176  		switch {
   177  		case len(buf) <= buf8K:
   178  			ts.bufData = make([]byte, buf8K)
   179  		case len(buf) <= buf16K:
   180  			ts.bufData = make([]byte, buf16K)
   181  		case len(buf) <= buf32K:
   182  			ts.bufData = make([]byte, buf32K)
   183  		default:
   184  			ts.bufData = make([]byte, len(buf))
   185  		}
   186  	}
   187  	buf2 := ts.bufData[:len(buf)]
   188  
   189  	n, err := ts.tarR.Read(buf2)
   190  	if err != nil {
   191  		if err == io.EOF {
   192  			if _, err := ts.h.Write(buf2[:n]); err != nil {
   193  				return 0, err
   194  			}
   195  			if !ts.first {
   196  				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
   197  				ts.fileCounter++
   198  				ts.h.Reset()
   199  			} else {
   200  				ts.first = false
   201  			}
   202  
   203  			currentHeader, err := ts.tarR.Next()
   204  			if err != nil {
   205  				if err == io.EOF {
   206  					if err := ts.tarW.Close(); err != nil {
   207  						return 0, err
   208  					}
   209  					if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   210  						return 0, err
   211  					}
   212  					if err := ts.writer.Close(); err != nil {
   213  						return 0, err
   214  					}
   215  					ts.finished = true
   216  					return n, nil
   217  				}
   218  				return n, err
   219  			}
   220  			ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/")
   221  			if err := ts.encodeHeader(currentHeader); err != nil {
   222  				return 0, err
   223  			}
   224  			if err := ts.tarW.WriteHeader(currentHeader); err != nil {
   225  				return 0, err
   226  			}
   227  			if _, err := ts.tarW.Write(buf2[:n]); err != nil {
   228  				return 0, err
   229  			}
   230  			ts.tarW.Flush()
   231  			if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
   232  				return 0, err
   233  			}
   234  			ts.writer.Flush()
   235  
   236  			return ts.bufWriter.Read(buf)
   237  		}
   238  		return n, err
   239  	}
   240  
   241  	// Filling the hash buffer
   242  	if _, err = ts.h.Write(buf2[:n]); err != nil {
   243  		return 0, err
   244  	}
   245  
   246  	// Filling the tar writter
   247  	if _, err = ts.tarW.Write(buf2[:n]); err != nil {
   248  		return 0, err
   249  	}
   250  	ts.tarW.Flush()
   251  
   252  	// Filling the output writer
   253  	if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
   254  		return 0, err
   255  	}
   256  	ts.writer.Flush()
   257  
   258  	return ts.bufWriter.Read(buf)
   259  }
   260  
   261  func (ts *tarSum) Sum(extra []byte) string {
   262  	ts.sums.SortBySums()
   263  	h := ts.tHash.Hash()
   264  	if extra != nil {
   265  		h.Write(extra)
   266  	}
   267  	for _, fis := range ts.sums {
   268  		h.Write([]byte(fis.Sum()))
   269  	}
   270  	checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
   271  	return checksum
   272  }
   273  
   274  func (ts *tarSum) GetSums() FileInfoSums {
   275  	return ts.sums
   276  }