github.com/lusis/distribution@v2.0.1+incompatible/registry/storage/layerwriter.go (about) 1 package storage 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "path" 8 "strconv" 9 "time" 10 11 "github.com/Sirupsen/logrus" 12 "github.com/docker/distribution" 13 ctxu "github.com/docker/distribution/context" 14 "github.com/docker/distribution/digest" 15 storagedriver "github.com/docker/distribution/registry/storage/driver" 16 ) 17 18 var _ distribution.LayerUpload = &layerWriter{} 19 20 // layerWriter is used to control the various aspects of resumable 21 // layer upload. It implements the LayerUpload interface. 22 type layerWriter struct { 23 layerStore *layerStore 24 25 uuid string 26 startedAt time.Time 27 resumableDigester digest.ResumableDigester 28 29 // implementes io.WriteSeeker, io.ReaderFrom and io.Closer to satisfy 30 // LayerUpload Interface 31 bufferedFileWriter 32 } 33 34 var _ distribution.LayerUpload = &layerWriter{} 35 36 // UUID returns the identifier for this upload. 37 func (lw *layerWriter) UUID() string { 38 return lw.uuid 39 } 40 41 func (lw *layerWriter) StartedAt() time.Time { 42 return lw.startedAt 43 } 44 45 // Finish marks the upload as completed, returning a valid handle to the 46 // uploaded layer. The final size and checksum are validated against the 47 // contents of the uploaded layer. The checksum should be provided in the 48 // format <algorithm>:<hex digest>. 49 func (lw *layerWriter) Finish(dgst digest.Digest) (distribution.Layer, error) { 50 ctxu.GetLogger(lw.layerStore.repository.ctx).Debug("(*layerWriter).Finish") 51 52 if err := lw.bufferedFileWriter.Close(); err != nil { 53 return nil, err 54 } 55 56 var ( 57 canonical digest.Digest 58 err error 59 ) 60 61 // HACK(stevvooe): To deal with s3's lack of consistency, attempt to retry 62 // validation on failure. Three attempts are made, backing off 63 // retries*100ms each time. 64 for retries := 0; ; retries++ { 65 canonical, err = lw.validateLayer(dgst) 66 if err == nil { 67 break 68 } 69 70 ctxu.GetLoggerWithField(lw.layerStore.repository.ctx, "retries", retries). 71 Errorf("error validating layer: %v", err) 72 73 if retries < 3 { 74 time.Sleep(100 * time.Millisecond * time.Duration(retries+1)) 75 continue 76 } 77 78 return nil, err 79 80 } 81 82 if err := lw.moveLayer(canonical); err != nil { 83 // TODO(stevvooe): Cleanup? 84 return nil, err 85 } 86 87 // Link the layer blob into the repository. 88 if err := lw.linkLayer(canonical, dgst); err != nil { 89 return nil, err 90 } 91 92 if err := lw.removeResources(); err != nil { 93 return nil, err 94 } 95 96 return lw.layerStore.Fetch(canonical) 97 } 98 99 // Cancel the layer upload process. 100 func (lw *layerWriter) Cancel() error { 101 ctxu.GetLogger(lw.layerStore.repository.ctx).Debug("(*layerWriter).Cancel") 102 if err := lw.removeResources(); err != nil { 103 return err 104 } 105 106 lw.Close() 107 return nil 108 } 109 110 func (lw *layerWriter) Write(p []byte) (int, error) { 111 if lw.resumableDigester == nil { 112 return lw.bufferedFileWriter.Write(p) 113 } 114 115 // Ensure that the current write offset matches how many bytes have been 116 // written to the digester. If not, we need to update the digest state to 117 // match the current write position. 118 if err := lw.resumeHashAt(lw.offset); err != nil { 119 return 0, err 120 } 121 122 return io.MultiWriter(&lw.bufferedFileWriter, lw.resumableDigester).Write(p) 123 } 124 125 func (lw *layerWriter) ReadFrom(r io.Reader) (n int64, err error) { 126 if lw.resumableDigester == nil { 127 return lw.bufferedFileWriter.ReadFrom(r) 128 } 129 130 // Ensure that the current write offset matches how many bytes have been 131 // written to the digester. If not, we need to update the digest state to 132 // match the current write position. 133 if err := lw.resumeHashAt(lw.offset); err != nil { 134 return 0, err 135 } 136 137 return lw.bufferedFileWriter.ReadFrom(io.TeeReader(r, lw.resumableDigester)) 138 } 139 140 func (lw *layerWriter) Close() error { 141 if lw.err != nil { 142 return lw.err 143 } 144 145 if lw.resumableDigester != nil { 146 if err := lw.storeHashState(); err != nil { 147 return err 148 } 149 } 150 151 return lw.bufferedFileWriter.Close() 152 } 153 154 type hashStateEntry struct { 155 offset int64 156 path string 157 } 158 159 // getStoredHashStates returns a slice of hashStateEntries for this upload. 160 func (lw *layerWriter) getStoredHashStates() ([]hashStateEntry, error) { 161 uploadHashStatePathPrefix, err := lw.layerStore.repository.pm.path(uploadHashStatePathSpec{ 162 name: lw.layerStore.repository.Name(), 163 uuid: lw.uuid, 164 alg: lw.resumableDigester.Digest().Algorithm(), 165 list: true, 166 }) 167 if err != nil { 168 return nil, err 169 } 170 171 paths, err := lw.driver.List(uploadHashStatePathPrefix) 172 if err != nil { 173 if _, ok := err.(storagedriver.PathNotFoundError); !ok { 174 return nil, err 175 } 176 // Treat PathNotFoundError as no entries. 177 paths = nil 178 } 179 180 hashStateEntries := make([]hashStateEntry, 0, len(paths)) 181 182 for _, p := range paths { 183 pathSuffix := path.Base(p) 184 // The suffix should be the offset. 185 offset, err := strconv.ParseInt(pathSuffix, 0, 64) 186 if err != nil { 187 logrus.Errorf("unable to parse offset from upload state path %q: %s", p, err) 188 } 189 190 hashStateEntries = append(hashStateEntries, hashStateEntry{offset: offset, path: p}) 191 } 192 193 return hashStateEntries, nil 194 } 195 196 // resumeHashAt attempts to restore the state of the internal hash function 197 // by loading the most recent saved hash state less than or equal to the given 198 // offset. Any unhashed bytes remaining less than the given offset are hashed 199 // from the content uploaded so far. 200 func (lw *layerWriter) resumeHashAt(offset int64) error { 201 if offset < 0 { 202 return fmt.Errorf("cannot resume hash at negative offset: %d", offset) 203 } 204 205 if offset == int64(lw.resumableDigester.Len()) { 206 // State of digester is already at the requested offset. 207 return nil 208 } 209 210 // List hash states from storage backend. 211 var hashStateMatch hashStateEntry 212 hashStates, err := lw.getStoredHashStates() 213 if err != nil { 214 return fmt.Errorf("unable to get stored hash states with offset %d: %s", offset, err) 215 } 216 217 // Find the highest stored hashState with offset less than or equal to 218 // the requested offset. 219 for _, hashState := range hashStates { 220 if hashState.offset == offset { 221 hashStateMatch = hashState 222 break // Found an exact offset match. 223 } else if hashState.offset < offset && hashState.offset > hashStateMatch.offset { 224 // This offset is closer to the requested offset. 225 hashStateMatch = hashState 226 } else if hashState.offset > offset { 227 // Remove any stored hash state with offsets higher than this one 228 // as writes to this resumed hasher will make those invalid. This 229 // is probably okay to skip for now since we don't expect anyone to 230 // use the API in this way. For that reason, we don't treat an 231 // an error here as a fatal error, but only log it. 232 if err := lw.driver.Delete(hashState.path); err != nil { 233 logrus.Errorf("unable to delete stale hash state %q: %s", hashState.path, err) 234 } 235 } 236 } 237 238 if hashStateMatch.offset == 0 { 239 // No need to load any state, just reset the hasher. 240 lw.resumableDigester.Reset() 241 } else { 242 storedState, err := lw.driver.GetContent(hashStateMatch.path) 243 if err != nil { 244 return err 245 } 246 247 if err = lw.resumableDigester.Restore(storedState); err != nil { 248 return err 249 } 250 } 251 252 // Mind the gap. 253 if gapLen := offset - int64(lw.resumableDigester.Len()); gapLen > 0 { 254 // Need to read content from the upload to catch up to the desired 255 // offset. 256 fr, err := newFileReader(lw.driver, lw.path) 257 if err != nil { 258 return err 259 } 260 261 if _, err = fr.Seek(int64(lw.resumableDigester.Len()), os.SEEK_SET); err != nil { 262 return fmt.Errorf("unable to seek to layer reader offset %d: %s", lw.resumableDigester.Len(), err) 263 } 264 265 if _, err := io.CopyN(lw.resumableDigester, fr, gapLen); err != nil { 266 return err 267 } 268 } 269 270 return nil 271 } 272 273 func (lw *layerWriter) storeHashState() error { 274 uploadHashStatePath, err := lw.layerStore.repository.pm.path(uploadHashStatePathSpec{ 275 name: lw.layerStore.repository.Name(), 276 uuid: lw.uuid, 277 alg: lw.resumableDigester.Digest().Algorithm(), 278 offset: int64(lw.resumableDigester.Len()), 279 }) 280 if err != nil { 281 return err 282 } 283 284 hashState, err := lw.resumableDigester.State() 285 if err != nil { 286 return err 287 } 288 289 return lw.driver.PutContent(uploadHashStatePath, hashState) 290 } 291 292 // validateLayer checks the layer data against the digest, returning an error 293 // if it does not match. The canonical digest is returned. 294 func (lw *layerWriter) validateLayer(dgst digest.Digest) (digest.Digest, error) { 295 var ( 296 verified, fullHash bool 297 canonical digest.Digest 298 ) 299 300 if lw.resumableDigester != nil { 301 // Restore the hasher state to the end of the upload. 302 if err := lw.resumeHashAt(lw.size); err != nil { 303 return "", err 304 } 305 306 canonical = lw.resumableDigester.Digest() 307 308 if canonical.Algorithm() == dgst.Algorithm() { 309 // Common case: client and server prefer the same canonical digest 310 // algorithm - currently SHA256. 311 verified = dgst == canonical 312 } else { 313 // The client wants to use a different digest algorithm. They'll just 314 // have to be patient and wait for us to download and re-hash the 315 // uploaded content using that digest algorithm. 316 fullHash = true 317 } 318 } else { 319 // Not using resumable digests, so we need to hash the entire layer. 320 fullHash = true 321 } 322 323 if fullHash { 324 digester := digest.NewCanonicalDigester() 325 326 digestVerifier, err := digest.NewDigestVerifier(dgst) 327 if err != nil { 328 return "", err 329 } 330 331 // Read the file from the backend driver and validate it. 332 fr, err := newFileReader(lw.bufferedFileWriter.driver, lw.path) 333 if err != nil { 334 return "", err 335 } 336 337 tr := io.TeeReader(fr, digester) 338 339 if _, err = io.Copy(digestVerifier, tr); err != nil { 340 return "", err 341 } 342 343 canonical = digester.Digest() 344 verified = digestVerifier.Verified() 345 } 346 347 if !verified { 348 ctxu.GetLoggerWithField(lw.layerStore.repository.ctx, "canonical", dgst). 349 Errorf("canonical digest does match provided digest") 350 return "", distribution.ErrLayerInvalidDigest{ 351 Digest: dgst, 352 Reason: fmt.Errorf("content does not match digest"), 353 } 354 } 355 356 return canonical, nil 357 } 358 359 // moveLayer moves the data into its final, hash-qualified destination, 360 // identified by dgst. The layer should be validated before commencing the 361 // move. 362 func (lw *layerWriter) moveLayer(dgst digest.Digest) error { 363 blobPath, err := lw.layerStore.repository.pm.path(blobDataPathSpec{ 364 digest: dgst, 365 }) 366 367 if err != nil { 368 return err 369 } 370 371 // Check for existence 372 if _, err := lw.driver.Stat(blobPath); err != nil { 373 switch err := err.(type) { 374 case storagedriver.PathNotFoundError: 375 break // ensure that it doesn't exist. 376 default: 377 return err 378 } 379 } else { 380 // If the path exists, we can assume that the content has already 381 // been uploaded, since the blob storage is content-addressable. 382 // While it may be corrupted, detection of such corruption belongs 383 // elsewhere. 384 return nil 385 } 386 387 // If no data was received, we may not actually have a file on disk. Check 388 // the size here and write a zero-length file to blobPath if this is the 389 // case. For the most part, this should only ever happen with zero-length 390 // tars. 391 if _, err := lw.driver.Stat(lw.path); err != nil { 392 switch err := err.(type) { 393 case storagedriver.PathNotFoundError: 394 // HACK(stevvooe): This is slightly dangerous: if we verify above, 395 // get a hash, then the underlying file is deleted, we risk moving 396 // a zero-length blob into a nonzero-length blob location. To 397 // prevent this horrid thing, we employ the hack of only allowing 398 // to this happen for the zero tarsum. 399 if dgst == digest.DigestSha256EmptyTar { 400 return lw.driver.PutContent(blobPath, []byte{}) 401 } 402 403 // We let this fail during the move below. 404 logrus. 405 WithField("upload.uuid", lw.UUID()). 406 WithField("digest", dgst).Warnf("attempted to move zero-length content with non-zero digest") 407 default: 408 return err // unrelated error 409 } 410 } 411 412 return lw.driver.Move(lw.path, blobPath) 413 } 414 415 // linkLayer links a valid, written layer blob into the registry under the 416 // named repository for the upload controller. 417 func (lw *layerWriter) linkLayer(canonical digest.Digest, aliases ...digest.Digest) error { 418 dgsts := append([]digest.Digest{canonical}, aliases...) 419 420 // Don't make duplicate links. 421 seenDigests := make(map[digest.Digest]struct{}, len(dgsts)) 422 423 for _, dgst := range dgsts { 424 if _, seen := seenDigests[dgst]; seen { 425 continue 426 } 427 seenDigests[dgst] = struct{}{} 428 429 layerLinkPath, err := lw.layerStore.repository.pm.path(layerLinkPathSpec{ 430 name: lw.layerStore.repository.Name(), 431 digest: dgst, 432 }) 433 434 if err != nil { 435 return err 436 } 437 438 if err := lw.layerStore.repository.driver.PutContent(layerLinkPath, []byte(canonical)); err != nil { 439 return err 440 } 441 } 442 443 return nil 444 } 445 446 // removeResources should clean up all resources associated with the upload 447 // instance. An error will be returned if the clean up cannot proceed. If the 448 // resources are already not present, no error will be returned. 449 func (lw *layerWriter) removeResources() error { 450 dataPath, err := lw.layerStore.repository.pm.path(uploadDataPathSpec{ 451 name: lw.layerStore.repository.Name(), 452 uuid: lw.uuid, 453 }) 454 455 if err != nil { 456 return err 457 } 458 459 // Resolve and delete the containing directory, which should include any 460 // upload related files. 461 dirPath := path.Dir(dataPath) 462 463 if err := lw.driver.Delete(dirPath); err != nil { 464 switch err := err.(type) { 465 case storagedriver.PathNotFoundError: 466 break // already gone! 467 default: 468 // This should be uncommon enough such that returning an error 469 // should be okay. At this point, the upload should be mostly 470 // complete, but perhaps the backend became unaccessible. 471 logrus.Errorf("unable to delete layer upload resources %q: %v", dirPath, err) 472 return err 473 } 474 } 475 476 return nil 477 }