github.com/sijibomii/docker@v0.0.0-20231230191044-5cf6ca554647/pkg/tarsum/tarsum.go (about) 1 // Package tarsum provides algorithms to perform checksum calculation on 2 // filesystem layers. 3 // 4 // The transportation of filesystems, regarding Docker, is done with tar(1) 5 // archives. There are a variety of tar serialization formats [2], and a key 6 // concern here is ensuring a repeatable checksum given a set of inputs from a 7 // generic tar archive. Types of transportation include distribution to and from a 8 // registry endpoint, saving and loading through commands or Docker daemon APIs, 9 // transferring the build context from client to Docker daemon, and committing the 10 // filesystem of a container to become an image. 11 // 12 // As tar archives are used for transit, but not preserved in many situations, the 13 // focus of the algorithm is to ensure the integrity of the preserved filesystem, 14 // while maintaining a deterministic accountability. This includes neither 15 // constraining the ordering or manipulation of the files during the creation or 16 // unpacking of the archive, nor include additional metadata state about the file 17 // system attributes. 18 package tarsum 19 20 import ( 21 "archive/tar" 22 "bytes" 23 "compress/gzip" 24 "crypto" 25 "crypto/sha256" 26 "encoding/hex" 27 "errors" 28 "fmt" 29 "hash" 30 "io" 31 "strings" 32 ) 33 34 const ( 35 buf8K = 8 * 1024 36 buf16K = 16 * 1024 37 buf32K = 32 * 1024 38 ) 39 40 // NewTarSum creates a new interface for calculating a fixed time checksum of a 41 // tar archive. 42 // 43 // This is used for calculating checksums of layers of an image, in some cases 44 // including the byte payload of the image's json metadata as well, and for 45 // calculating the checksums for buildcache. 46 func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) { 47 return NewTarSumHash(r, dc, v, DefaultTHash) 48 } 49 50 // NewTarSumHash creates a new TarSum, providing a THash to use rather than 51 // the DefaultTHash. 52 func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) { 53 headerSelector, err := getTarHeaderSelector(v) 54 if err != nil { 55 return nil, err 56 } 57 ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash} 58 err = ts.initTarSum() 59 return ts, err 60 } 61 62 // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label. 63 func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) { 64 parts := strings.SplitN(label, "+", 2) 65 if len(parts) != 2 { 66 return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}") 67 } 68 69 versionName, hashName := parts[0], parts[1] 70 71 version, ok := tarSumVersionsByName[versionName] 72 if !ok { 73 return nil, fmt.Errorf("unknown TarSum version name: %q", versionName) 74 } 75 76 hashConfig, ok := standardHashConfigs[hashName] 77 if !ok { 78 return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName) 79 } 80 81 tHash := NewTHash(hashConfig.name, hashConfig.hash.New) 82 83 return NewTarSumHash(r, disableCompression, version, tHash) 84 } 85 86 // TarSum is the generic interface for calculating fixed time 87 // checksums of a tar archive. 88 type TarSum interface { 89 io.Reader 90 GetSums() FileInfoSums 91 Sum([]byte) string 92 Version() Version 93 Hash() THash 94 } 95 96 // tarSum struct is the structure for a Version0 checksum calculation. 97 type tarSum struct { 98 io.Reader 99 tarR *tar.Reader 100 tarW *tar.Writer 101 writer writeCloseFlusher 102 bufTar *bytes.Buffer 103 bufWriter *bytes.Buffer 104 bufData []byte 105 h hash.Hash 106 tHash THash 107 sums FileInfoSums 108 fileCounter int64 109 currentFile string 110 finished bool 111 first bool 112 DisableCompression bool // false by default. When false, the output gzip compressed. 113 tarSumVersion Version // this field is not exported so it can not be mutated during use 114 headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive 115 } 116 117 func (ts tarSum) Hash() THash { 118 return ts.tHash 119 } 120 121 func (ts tarSum) Version() Version { 122 return ts.tarSumVersion 123 } 124 125 // THash provides a hash.Hash type generator and its name. 126 type THash interface { 127 Hash() hash.Hash 128 Name() string 129 } 130 131 // NewTHash is a convenience method for creating a THash. 132 func NewTHash(name string, h func() hash.Hash) THash { 133 return simpleTHash{n: name, h: h} 134 } 135 136 type tHashConfig struct { 137 name string 138 hash crypto.Hash 139 } 140 141 var ( 142 // NOTE: DO NOT include MD5 or SHA1, which are considered insecure. 143 standardHashConfigs = map[string]tHashConfig{ 144 "sha256": {name: "sha256", hash: crypto.SHA256}, 145 "sha512": {name: "sha512", hash: crypto.SHA512}, 146 } 147 ) 148 149 // DefaultTHash is default TarSum hashing algorithm - "sha256". 150 var DefaultTHash = NewTHash("sha256", sha256.New) 151 152 type simpleTHash struct { 153 n string 154 h func() hash.Hash 155 } 156 157 func (sth simpleTHash) Name() string { return sth.n } 158 func (sth simpleTHash) Hash() hash.Hash { return sth.h() } 159 160 func (ts *tarSum) encodeHeader(h *tar.Header) error { 161 for _, elem := range ts.headerSelector.selectHeaders(h) { 162 if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil { 163 return err 164 } 165 } 166 return nil 167 } 168 169 func (ts *tarSum) initTarSum() error { 170 ts.bufTar = bytes.NewBuffer([]byte{}) 171 ts.bufWriter = bytes.NewBuffer([]byte{}) 172 ts.tarR = tar.NewReader(ts.Reader) 173 ts.tarW = tar.NewWriter(ts.bufTar) 174 if !ts.DisableCompression { 175 ts.writer = gzip.NewWriter(ts.bufWriter) 176 } else { 177 ts.writer = &nopCloseFlusher{Writer: ts.bufWriter} 178 } 179 if ts.tHash == nil { 180 ts.tHash = DefaultTHash 181 } 182 ts.h = ts.tHash.Hash() 183 ts.h.Reset() 184 ts.first = true 185 ts.sums = FileInfoSums{} 186 return nil 187 } 188 189 func (ts *tarSum) Read(buf []byte) (int, error) { 190 if ts.finished { 191 return ts.bufWriter.Read(buf) 192 } 193 if len(ts.bufData) < len(buf) { 194 switch { 195 case len(buf) <= buf8K: 196 ts.bufData = make([]byte, buf8K) 197 case len(buf) <= buf16K: 198 ts.bufData = make([]byte, buf16K) 199 case len(buf) <= buf32K: 200 ts.bufData = make([]byte, buf32K) 201 default: 202 ts.bufData = make([]byte, len(buf)) 203 } 204 } 205 buf2 := ts.bufData[:len(buf)] 206 207 n, err := ts.tarR.Read(buf2) 208 if err != nil { 209 if err == io.EOF { 210 if _, err := ts.h.Write(buf2[:n]); err != nil { 211 return 0, err 212 } 213 if !ts.first { 214 ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter}) 215 ts.fileCounter++ 216 ts.h.Reset() 217 } else { 218 ts.first = false 219 } 220 221 currentHeader, err := ts.tarR.Next() 222 if err != nil { 223 if err == io.EOF { 224 if err := ts.tarW.Close(); err != nil { 225 return 0, err 226 } 227 if _, err := io.Copy(ts.writer, ts.bufTar); err != nil { 228 return 0, err 229 } 230 if err := ts.writer.Close(); err != nil { 231 return 0, err 232 } 233 ts.finished = true 234 return n, nil 235 } 236 return n, err 237 } 238 ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/") 239 if err := ts.encodeHeader(currentHeader); err != nil { 240 return 0, err 241 } 242 if err := ts.tarW.WriteHeader(currentHeader); err != nil { 243 return 0, err 244 } 245 if _, err := ts.tarW.Write(buf2[:n]); err != nil { 246 return 0, err 247 } 248 ts.tarW.Flush() 249 if _, err := io.Copy(ts.writer, ts.bufTar); err != nil { 250 return 0, err 251 } 252 ts.writer.Flush() 253 254 return ts.bufWriter.Read(buf) 255 } 256 return n, err 257 } 258 259 // Filling the hash buffer 260 if _, err = ts.h.Write(buf2[:n]); err != nil { 261 return 0, err 262 } 263 264 // Filling the tar writer 265 if _, err = ts.tarW.Write(buf2[:n]); err != nil { 266 return 0, err 267 } 268 ts.tarW.Flush() 269 270 // Filling the output writer 271 if _, err = io.Copy(ts.writer, ts.bufTar); err != nil { 272 return 0, err 273 } 274 ts.writer.Flush() 275 276 return ts.bufWriter.Read(buf) 277 } 278 279 func (ts *tarSum) Sum(extra []byte) string { 280 ts.sums.SortBySums() 281 h := ts.tHash.Hash() 282 if extra != nil { 283 h.Write(extra) 284 } 285 for _, fis := range ts.sums { 286 h.Write([]byte(fis.Sum())) 287 } 288 checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil)) 289 return checksum 290 } 291 292 func (ts *tarSum) GetSums() FileInfoSums { 293 return ts.sums 294 }