github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/pkg/tarsum/tarsum.go (about) 1 // Package tarsum provides algorithms to perform checksum calculation on 2 // filesystem layers. 3 // 4 // The transportation of filesystems, regarding Docker, is done with tar(1) 5 // archives. There are a variety of tar serialization formats [2], and a key 6 // concern here is ensuring a repeatable checksum given a set of inputs from a 7 // generic tar archive. Types of transportation include distribution to and from a 8 // registry endpoint, saving and loading through commands or Docker daemon APIs, 9 // transferring the build context from client to Docker daemon, and committing the 10 // filesystem of a container to become an image. 11 // 12 // As tar archives are used for transit, but not preserved in many situations, the 13 // focus of the algorithm is to ensure the integrity of the preserved filesystem, 14 // while maintaining a deterministic accountability. This includes neither 15 // constraining the ordering or manipulation of the files during the creation or 16 // unpacking of the archive, nor include additional metadata state about the file 17 // system attributes. 18 package tarsum // import "github.com/demonoid81/moby/pkg/tarsum" 19 20 import ( 21 "archive/tar" 22 "bytes" 23 "compress/gzip" 24 "crypto" 25 "crypto/sha256" 26 "encoding/hex" 27 "errors" 28 "fmt" 29 "hash" 30 "io" 31 "path" 32 "strings" 33 ) 34 35 const ( 36 buf8K = 8 * 1024 37 buf16K = 16 * 1024 38 buf32K = 32 * 1024 39 ) 40 41 // NewTarSum creates a new interface for calculating a fixed time checksum of a 42 // tar archive. 43 // 44 // This is used for calculating checksums of layers of an image, in some cases 45 // including the byte payload of the image's json metadata as well, and for 46 // calculating the checksums for buildcache. 47 func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) { 48 return NewTarSumHash(r, dc, v, DefaultTHash) 49 } 50 51 // NewTarSumHash creates a new TarSum, providing a THash to use rather than 52 // the DefaultTHash. 53 func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) { 54 headerSelector, err := getTarHeaderSelector(v) 55 if err != nil { 56 return nil, err 57 } 58 ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash} 59 err = ts.initTarSum() 60 return ts, err 61 } 62 63 // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label. 64 func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) { 65 parts := strings.SplitN(label, "+", 2) 66 if len(parts) != 2 { 67 return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}") 68 } 69 70 versionName, hashName := parts[0], parts[1] 71 72 version, ok := tarSumVersionsByName[versionName] 73 if !ok { 74 return nil, fmt.Errorf("unknown TarSum version name: %q", versionName) 75 } 76 77 hashConfig, ok := standardHashConfigs[hashName] 78 if !ok { 79 return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName) 80 } 81 82 tHash := NewTHash(hashConfig.name, hashConfig.hash.New) 83 84 return NewTarSumHash(r, disableCompression, version, tHash) 85 } 86 87 // TarSum is the generic interface for calculating fixed time 88 // checksums of a tar archive. 89 type TarSum interface { 90 io.Reader 91 GetSums() FileInfoSums 92 Sum([]byte) string 93 Version() Version 94 Hash() THash 95 } 96 97 // tarSum struct is the structure for a Version0 checksum calculation. 98 type tarSum struct { 99 io.Reader 100 tarR *tar.Reader 101 tarW *tar.Writer 102 writer writeCloseFlusher 103 bufTar *bytes.Buffer 104 bufWriter *bytes.Buffer 105 bufData []byte 106 h hash.Hash 107 tHash THash 108 sums FileInfoSums 109 fileCounter int64 110 currentFile string 111 finished bool 112 first bool 113 DisableCompression bool // false by default. When false, the output gzip compressed. 114 tarSumVersion Version // this field is not exported so it can not be mutated during use 115 headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive 116 } 117 118 func (ts tarSum) Hash() THash { 119 return ts.tHash 120 } 121 122 func (ts tarSum) Version() Version { 123 return ts.tarSumVersion 124 } 125 126 // THash provides a hash.Hash type generator and its name. 127 type THash interface { 128 Hash() hash.Hash 129 Name() string 130 } 131 132 // NewTHash is a convenience method for creating a THash. 133 func NewTHash(name string, h func() hash.Hash) THash { 134 return simpleTHash{n: name, h: h} 135 } 136 137 type tHashConfig struct { 138 name string 139 hash crypto.Hash 140 } 141 142 var ( 143 // NOTE: DO NOT include MD5 or SHA1, which are considered insecure. 144 standardHashConfigs = map[string]tHashConfig{ 145 "sha256": {name: "sha256", hash: crypto.SHA256}, 146 "sha512": {name: "sha512", hash: crypto.SHA512}, 147 } 148 ) 149 150 // DefaultTHash is default TarSum hashing algorithm - "sha256". 151 var DefaultTHash = NewTHash("sha256", sha256.New) 152 153 type simpleTHash struct { 154 n string 155 h func() hash.Hash 156 } 157 158 func (sth simpleTHash) Name() string { return sth.n } 159 func (sth simpleTHash) Hash() hash.Hash { return sth.h() } 160 161 func (ts *tarSum) encodeHeader(h *tar.Header) error { 162 for _, elem := range ts.headerSelector.selectHeaders(h) { 163 // Ignore these headers to be compatible with versions 164 // before go 1.10 165 if elem[0] == "gname" || elem[0] == "uname" { 166 elem[1] = "" 167 } 168 if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil { 169 return err 170 } 171 } 172 return nil 173 } 174 175 func (ts *tarSum) initTarSum() error { 176 ts.bufTar = bytes.NewBuffer([]byte{}) 177 ts.bufWriter = bytes.NewBuffer([]byte{}) 178 ts.tarR = tar.NewReader(ts.Reader) 179 ts.tarW = tar.NewWriter(ts.bufTar) 180 if !ts.DisableCompression { 181 ts.writer = gzip.NewWriter(ts.bufWriter) 182 } else { 183 ts.writer = &nopCloseFlusher{Writer: ts.bufWriter} 184 } 185 if ts.tHash == nil { 186 ts.tHash = DefaultTHash 187 } 188 ts.h = ts.tHash.Hash() 189 ts.h.Reset() 190 ts.first = true 191 ts.sums = FileInfoSums{} 192 return nil 193 } 194 195 func (ts *tarSum) Read(buf []byte) (int, error) { 196 if ts.finished { 197 return ts.bufWriter.Read(buf) 198 } 199 if len(ts.bufData) < len(buf) { 200 switch { 201 case len(buf) <= buf8K: 202 ts.bufData = make([]byte, buf8K) 203 case len(buf) <= buf16K: 204 ts.bufData = make([]byte, buf16K) 205 case len(buf) <= buf32K: 206 ts.bufData = make([]byte, buf32K) 207 default: 208 ts.bufData = make([]byte, len(buf)) 209 } 210 } 211 buf2 := ts.bufData[:len(buf)] 212 213 n, err := ts.tarR.Read(buf2) 214 if err != nil { 215 if err == io.EOF { 216 if _, err := ts.h.Write(buf2[:n]); err != nil { 217 return 0, err 218 } 219 if !ts.first { 220 ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter}) 221 ts.fileCounter++ 222 ts.h.Reset() 223 } else { 224 ts.first = false 225 } 226 227 if _, err := ts.tarW.Write(buf2[:n]); err != nil { 228 return 0, err 229 } 230 231 currentHeader, err := ts.tarR.Next() 232 if err != nil { 233 if err == io.EOF { 234 if err := ts.tarW.Close(); err != nil { 235 return 0, err 236 } 237 if _, err := io.Copy(ts.writer, ts.bufTar); err != nil { 238 return 0, err 239 } 240 if err := ts.writer.Close(); err != nil { 241 return 0, err 242 } 243 ts.finished = true 244 return ts.bufWriter.Read(buf) 245 } 246 return 0, err 247 } 248 249 ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name)) 250 if err := ts.encodeHeader(currentHeader); err != nil { 251 return 0, err 252 } 253 if err := ts.tarW.WriteHeader(currentHeader); err != nil { 254 return 0, err 255 } 256 257 if _, err := io.Copy(ts.writer, ts.bufTar); err != nil { 258 return 0, err 259 } 260 ts.writer.Flush() 261 262 return ts.bufWriter.Read(buf) 263 } 264 return 0, err 265 } 266 267 // Filling the hash buffer 268 if _, err = ts.h.Write(buf2[:n]); err != nil { 269 return 0, err 270 } 271 272 // Filling the tar writer 273 if _, err = ts.tarW.Write(buf2[:n]); err != nil { 274 return 0, err 275 } 276 277 // Filling the output writer 278 if _, err = io.Copy(ts.writer, ts.bufTar); err != nil { 279 return 0, err 280 } 281 ts.writer.Flush() 282 283 return ts.bufWriter.Read(buf) 284 } 285 286 func (ts *tarSum) Sum(extra []byte) string { 287 ts.sums.SortBySums() 288 h := ts.tHash.Hash() 289 if extra != nil { 290 h.Write(extra) 291 } 292 for _, fis := range ts.sums { 293 h.Write([]byte(fis.Sum())) 294 } 295 checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil)) 296 return checksum 297 } 298 299 func (ts *tarSum) GetSums() FileInfoSums { 300 return ts.sums 301 }