github.com/mika/distribution@v2.2.2-0.20160108133430-a75790e3d8e0+incompatible/registry/storage/blobwriter.go (about)

     1  package storage
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"path"
     8  	"time"
     9  
    10  	"github.com/Sirupsen/logrus"
    11  	"github.com/docker/distribution"
    12  	"github.com/docker/distribution/context"
    13  	"github.com/docker/distribution/digest"
    14  	storagedriver "github.com/docker/distribution/registry/storage/driver"
    15  )
    16  
    17  var (
    18  	errResumableDigestNotAvailable = errors.New("resumable digest not available")
    19  )
    20  
    21  // layerWriter is used to control the various aspects of resumable
    22  // layer upload. It implements the LayerUpload interface.
    23  type blobWriter struct {
    24  	blobStore *linkedBlobStore
    25  
    26  	id        string
    27  	startedAt time.Time
    28  	digester  digest.Digester
    29  	written   int64 // track the contiguous write
    30  
    31  	// implementes io.WriteSeeker, io.ReaderFrom and io.Closer to satisfy
    32  	// LayerUpload Interface
    33  	bufferedFileWriter
    34  
    35  	resumableDigestEnabled bool
    36  }
    37  
    38  var _ distribution.BlobWriter = &blobWriter{}
    39  
    40  // ID returns the identifier for this upload.
    41  func (bw *blobWriter) ID() string {
    42  	return bw.id
    43  }
    44  
    45  func (bw *blobWriter) StartedAt() time.Time {
    46  	return bw.startedAt
    47  }
    48  
    49  // Commit marks the upload as completed, returning a valid descriptor. The
    50  // final size and digest are checked against the first descriptor provided.
    51  func (bw *blobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) {
    52  	context.GetLogger(ctx).Debug("(*blobWriter).Commit")
    53  
    54  	if err := bw.bufferedFileWriter.Close(); err != nil {
    55  		return distribution.Descriptor{}, err
    56  	}
    57  
    58  	canonical, err := bw.validateBlob(ctx, desc)
    59  	if err != nil {
    60  		return distribution.Descriptor{}, err
    61  	}
    62  
    63  	if err := bw.moveBlob(ctx, canonical); err != nil {
    64  		return distribution.Descriptor{}, err
    65  	}
    66  
    67  	if err := bw.blobStore.linkBlob(ctx, canonical, desc.Digest); err != nil {
    68  		return distribution.Descriptor{}, err
    69  	}
    70  
    71  	if err := bw.removeResources(ctx); err != nil {
    72  		return distribution.Descriptor{}, err
    73  	}
    74  
    75  	err = bw.blobStore.blobAccessController.SetDescriptor(ctx, canonical.Digest, canonical)
    76  	if err != nil {
    77  		return distribution.Descriptor{}, err
    78  	}
    79  
    80  	return canonical, nil
    81  }
    82  
    83  // Rollback the blob upload process, releasing any resources associated with
    84  // the writer and canceling the operation.
    85  func (bw *blobWriter) Cancel(ctx context.Context) error {
    86  	context.GetLogger(ctx).Debug("(*blobWriter).Rollback")
    87  	if err := bw.removeResources(ctx); err != nil {
    88  		return err
    89  	}
    90  
    91  	bw.Close()
    92  	return nil
    93  }
    94  
    95  func (bw *blobWriter) Write(p []byte) (int, error) {
    96  	// Ensure that the current write offset matches how many bytes have been
    97  	// written to the digester. If not, we need to update the digest state to
    98  	// match the current write position.
    99  	if err := bw.resumeDigestAt(bw.blobStore.ctx, bw.offset); err != nil && err != errResumableDigestNotAvailable {
   100  		return 0, err
   101  	}
   102  
   103  	n, err := io.MultiWriter(&bw.bufferedFileWriter, bw.digester.Hash()).Write(p)
   104  	bw.written += int64(n)
   105  
   106  	return n, err
   107  }
   108  
   109  func (bw *blobWriter) ReadFrom(r io.Reader) (n int64, err error) {
   110  	// Ensure that the current write offset matches how many bytes have been
   111  	// written to the digester. If not, we need to update the digest state to
   112  	// match the current write position.
   113  	if err := bw.resumeDigestAt(bw.blobStore.ctx, bw.offset); err != nil && err != errResumableDigestNotAvailable {
   114  		return 0, err
   115  	}
   116  
   117  	nn, err := bw.bufferedFileWriter.ReadFrom(io.TeeReader(r, bw.digester.Hash()))
   118  	bw.written += nn
   119  
   120  	return nn, err
   121  }
   122  
   123  func (bw *blobWriter) Close() error {
   124  	if bw.err != nil {
   125  		return bw.err
   126  	}
   127  
   128  	if err := bw.storeHashState(bw.blobStore.ctx); err != nil {
   129  		return err
   130  	}
   131  
   132  	return bw.bufferedFileWriter.Close()
   133  }
   134  
   135  // validateBlob checks the data against the digest, returning an error if it
   136  // does not match. The canonical descriptor is returned.
   137  func (bw *blobWriter) validateBlob(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) {
   138  	var (
   139  		verified, fullHash bool
   140  		canonical          digest.Digest
   141  	)
   142  
   143  	if desc.Digest == "" {
   144  		// if no descriptors are provided, we have nothing to validate
   145  		// against. We don't really want to support this for the registry.
   146  		return distribution.Descriptor{}, distribution.ErrBlobInvalidDigest{
   147  			Reason: fmt.Errorf("cannot validate against empty digest"),
   148  		}
   149  	}
   150  
   151  	// Stat the on disk file
   152  	if fi, err := bw.bufferedFileWriter.driver.Stat(ctx, bw.path); err != nil {
   153  		switch err := err.(type) {
   154  		case storagedriver.PathNotFoundError:
   155  			// NOTE(stevvooe): We really don't care if the file is
   156  			// not actually present for the reader. We now assume
   157  			// that the desc length is zero.
   158  			desc.Size = 0
   159  		default:
   160  			// Any other error we want propagated up the stack.
   161  			return distribution.Descriptor{}, err
   162  		}
   163  	} else {
   164  		if fi.IsDir() {
   165  			return distribution.Descriptor{}, fmt.Errorf("unexpected directory at upload location %q", bw.path)
   166  		}
   167  
   168  		bw.size = fi.Size()
   169  	}
   170  
   171  	if desc.Size > 0 {
   172  		if desc.Size != bw.size {
   173  			return distribution.Descriptor{}, distribution.ErrBlobInvalidLength
   174  		}
   175  	} else {
   176  		// if provided 0 or negative length, we can assume caller doesn't know or
   177  		// care about length.
   178  		desc.Size = bw.size
   179  	}
   180  
   181  	// TODO(stevvooe): This section is very meandering. Need to be broken down
   182  	// to be a lot more clear.
   183  
   184  	if err := bw.resumeDigestAt(ctx, bw.size); err == nil {
   185  		canonical = bw.digester.Digest()
   186  
   187  		if canonical.Algorithm() == desc.Digest.Algorithm() {
   188  			// Common case: client and server prefer the same canonical digest
   189  			// algorithm - currently SHA256.
   190  			verified = desc.Digest == canonical
   191  		} else {
   192  			// The client wants to use a different digest algorithm. They'll just
   193  			// have to be patient and wait for us to download and re-hash the
   194  			// uploaded content using that digest algorithm.
   195  			fullHash = true
   196  		}
   197  	} else if err == errResumableDigestNotAvailable {
   198  		// Not using resumable digests, so we need to hash the entire layer.
   199  		fullHash = true
   200  	} else {
   201  		return distribution.Descriptor{}, err
   202  	}
   203  
   204  	if fullHash {
   205  		// a fantastic optimization: if the the written data and the size are
   206  		// the same, we don't need to read the data from the backend. This is
   207  		// because we've written the entire file in the lifecycle of the
   208  		// current instance.
   209  		if bw.written == bw.size && digest.Canonical == desc.Digest.Algorithm() {
   210  			canonical = bw.digester.Digest()
   211  			verified = desc.Digest == canonical
   212  		}
   213  
   214  		// If the check based on size fails, we fall back to the slowest of
   215  		// paths. We may be able to make the size-based check a stronger
   216  		// guarantee, so this may be defensive.
   217  		if !verified {
   218  			digester := digest.Canonical.New()
   219  
   220  			digestVerifier, err := digest.NewDigestVerifier(desc.Digest)
   221  			if err != nil {
   222  				return distribution.Descriptor{}, err
   223  			}
   224  
   225  			// Read the file from the backend driver and validate it.
   226  			fr, err := newFileReader(ctx, bw.bufferedFileWriter.driver, bw.path, desc.Size)
   227  			if err != nil {
   228  				return distribution.Descriptor{}, err
   229  			}
   230  			defer fr.Close()
   231  
   232  			tr := io.TeeReader(fr, digester.Hash())
   233  
   234  			if _, err := io.Copy(digestVerifier, tr); err != nil {
   235  				return distribution.Descriptor{}, err
   236  			}
   237  
   238  			canonical = digester.Digest()
   239  			verified = digestVerifier.Verified()
   240  		}
   241  	}
   242  
   243  	if !verified {
   244  		context.GetLoggerWithFields(ctx,
   245  			map[interface{}]interface{}{
   246  				"canonical": canonical,
   247  				"provided":  desc.Digest,
   248  			}, "canonical", "provided").
   249  			Errorf("canonical digest does match provided digest")
   250  		return distribution.Descriptor{}, distribution.ErrBlobInvalidDigest{
   251  			Digest: desc.Digest,
   252  			Reason: fmt.Errorf("content does not match digest"),
   253  		}
   254  	}
   255  
   256  	// update desc with canonical hash
   257  	desc.Digest = canonical
   258  
   259  	if desc.MediaType == "" {
   260  		desc.MediaType = "application/octet-stream"
   261  	}
   262  
   263  	return desc, nil
   264  }
   265  
   266  // moveBlob moves the data into its final, hash-qualified destination,
   267  // identified by dgst. The layer should be validated before commencing the
   268  // move.
   269  func (bw *blobWriter) moveBlob(ctx context.Context, desc distribution.Descriptor) error {
   270  	blobPath, err := pathFor(blobDataPathSpec{
   271  		digest: desc.Digest,
   272  	})
   273  
   274  	if err != nil {
   275  		return err
   276  	}
   277  
   278  	// Check for existence
   279  	if _, err := bw.blobStore.driver.Stat(ctx, blobPath); err != nil {
   280  		switch err := err.(type) {
   281  		case storagedriver.PathNotFoundError:
   282  			break // ensure that it doesn't exist.
   283  		default:
   284  			return err
   285  		}
   286  	} else {
   287  		// If the path exists, we can assume that the content has already
   288  		// been uploaded, since the blob storage is content-addressable.
   289  		// While it may be corrupted, detection of such corruption belongs
   290  		// elsewhere.
   291  		return nil
   292  	}
   293  
   294  	// If no data was received, we may not actually have a file on disk. Check
   295  	// the size here and write a zero-length file to blobPath if this is the
   296  	// case. For the most part, this should only ever happen with zero-length
   297  	// tars.
   298  	if _, err := bw.blobStore.driver.Stat(ctx, bw.path); err != nil {
   299  		switch err := err.(type) {
   300  		case storagedriver.PathNotFoundError:
   301  			// HACK(stevvooe): This is slightly dangerous: if we verify above,
   302  			// get a hash, then the underlying file is deleted, we risk moving
   303  			// a zero-length blob into a nonzero-length blob location. To
   304  			// prevent this horrid thing, we employ the hack of only allowing
   305  			// to this happen for the digest of an empty tar.
   306  			if desc.Digest == digest.DigestSha256EmptyTar {
   307  				return bw.blobStore.driver.PutContent(ctx, blobPath, []byte{})
   308  			}
   309  
   310  			// We let this fail during the move below.
   311  			logrus.
   312  				WithField("upload.id", bw.ID()).
   313  				WithField("digest", desc.Digest).Warnf("attempted to move zero-length content with non-zero digest")
   314  		default:
   315  			return err // unrelated error
   316  		}
   317  	}
   318  
   319  	// TODO(stevvooe): We should also write the mediatype when executing this move.
   320  
   321  	return bw.blobStore.driver.Move(ctx, bw.path, blobPath)
   322  }
   323  
   324  // removeResources should clean up all resources associated with the upload
   325  // instance. An error will be returned if the clean up cannot proceed. If the
   326  // resources are already not present, no error will be returned.
   327  func (bw *blobWriter) removeResources(ctx context.Context) error {
   328  	dataPath, err := pathFor(uploadDataPathSpec{
   329  		name: bw.blobStore.repository.Name(),
   330  		id:   bw.id,
   331  	})
   332  
   333  	if err != nil {
   334  		return err
   335  	}
   336  
   337  	// Resolve and delete the containing directory, which should include any
   338  	// upload related files.
   339  	dirPath := path.Dir(dataPath)
   340  	if err := bw.blobStore.driver.Delete(ctx, dirPath); err != nil {
   341  		switch err := err.(type) {
   342  		case storagedriver.PathNotFoundError:
   343  			break // already gone!
   344  		default:
   345  			// This should be uncommon enough such that returning an error
   346  			// should be okay. At this point, the upload should be mostly
   347  			// complete, but perhaps the backend became unaccessible.
   348  			context.GetLogger(ctx).Errorf("unable to delete layer upload resources %q: %v", dirPath, err)
   349  			return err
   350  		}
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  func (bw *blobWriter) Reader() (io.ReadCloser, error) {
   357  	// todo(richardscothern): Change to exponential backoff, i=0.5, e=2, n=4
   358  	try := 1
   359  	for try <= 5 {
   360  		_, err := bw.bufferedFileWriter.driver.Stat(bw.ctx, bw.path)
   361  		if err == nil {
   362  			break
   363  		}
   364  		switch err.(type) {
   365  		case storagedriver.PathNotFoundError:
   366  			context.GetLogger(bw.ctx).Debugf("Nothing found on try %d, sleeping...", try)
   367  			time.Sleep(1 * time.Second)
   368  			try++
   369  		default:
   370  			return nil, err
   371  		}
   372  	}
   373  
   374  	readCloser, err := bw.bufferedFileWriter.driver.ReadStream(bw.ctx, bw.path, 0)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  
   379  	return readCloser, nil
   380  }