github.com/uber/kraken@v0.1.4/lib/blobrefresh/refresher.go (about)

     1  // Copyright (c) 2016-2019 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  package blobrefresh
    15  
    16  import (
    17  	"errors"
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/uber/kraken/core"
    22  	"github.com/uber/kraken/lib/backend"
    23  	"github.com/uber/kraken/lib/backend/backenderrors"
    24  	"github.com/uber/kraken/lib/metainfogen"
    25  	"github.com/uber/kraken/lib/store"
    26  	"github.com/uber/kraken/utils/dedup"
    27  	"github.com/uber/kraken/utils/log"
    28  
    29  	"github.com/andres-erbsen/clock"
    30  	"github.com/c2h5oh/datasize"
    31  	"github.com/uber-go/tally"
    32  )
    33  
    34  // Refresher errors.
    35  var (
    36  	ErrPending     = errors.New("download is pending")
    37  	ErrNotFound    = errors.New("blob not found")
    38  	ErrWorkersBusy = errors.New("no workers available")
    39  )
    40  
    41  // PostHook runs after the blob has been downloaded within the context of the
    42  // deduplicated request.
    43  type PostHook interface {
    44  	Run(d core.Digest)
    45  }
    46  
    47  // Refresher deduplicates blob downloads / metainfo generation. Refresher is not
    48  // responsible for tracking whether blobs already exist on disk -- it only provides
    49  // a method for downloading blobs in a deduplicated fashion.
    50  type Refresher struct {
    51  	config            Config
    52  	stats             tally.Scope
    53  	requests          *dedup.RequestCache
    54  	cas               *store.CAStore
    55  	backends          *backend.Manager
    56  	metaInfoGenerator *metainfogen.Generator
    57  }
    58  
    59  // New creates a new Refresher.
    60  func New(
    61  	config Config,
    62  	stats tally.Scope,
    63  	cas *store.CAStore,
    64  	backends *backend.Manager,
    65  	metaInfoGenerator *metainfogen.Generator) *Refresher {
    66  
    67  	stats = stats.Tagged(map[string]string{
    68  		"module": "blobrefresh",
    69  	})
    70  
    71  	requests := dedup.NewRequestCache(dedup.RequestCacheConfig{}, clock.New())
    72  	requests.SetNotFound(func(err error) bool { return err == backenderrors.ErrBlobNotFound })
    73  
    74  	return &Refresher{config, stats, requests, cas, backends, metaInfoGenerator}
    75  }
    76  
    77  // Refresh kicks off a background goroutine to download the blob for d from the
    78  // remote backend configured for namespace and generates metainfo for the blob.
    79  // Returns ErrPending if an existing download for the blob is already running.
    80  // Returns ErrNotFound if the blob is not found. Returns ErrWorkersBusy if no
    81  // goroutines are available to run the download.
    82  func (r *Refresher) Refresh(namespace string, d core.Digest, hooks ...PostHook) error {
    83  	client, err := r.backends.GetClient(namespace)
    84  	if err != nil {
    85  		return fmt.Errorf("backend manager: %s", err)
    86  	}
    87  
    88  	// Always check whether the blob is actually available and valid before
    89  	// returning a potential pending error. This ensures that the majority of
    90  	// errors are propogated quickly and syncronously.
    91  	info, err := client.Stat(namespace, d.Hex())
    92  	if err != nil {
    93  		if err == backenderrors.ErrBlobNotFound {
    94  			return ErrNotFound
    95  		}
    96  		return fmt.Errorf("stat: %s", err)
    97  	}
    98  	size := datasize.ByteSize(info.Size)
    99  	if r.config.SizeLimit > 0 && size > r.config.SizeLimit {
   100  		return fmt.Errorf("%s blob exceeds size limit of %s", size, r.config.SizeLimit)
   101  	}
   102  
   103  	id := namespace + ":" + d.Hex()
   104  	err = r.requests.Start(id, func() error {
   105  		start := time.Now()
   106  		if err := r.download(client, namespace, d); err != nil {
   107  			return err
   108  		}
   109  		t := time.Since(start)
   110  		r.stats.Timer("download_remote_blob").Record(t)
   111  		log.With(
   112  			"namespace", namespace,
   113  			"name", d.Hex(),
   114  			"download_time", t).Info("Downloaded remote blob")
   115  
   116  		if err := r.metaInfoGenerator.Generate(d); err != nil {
   117  			return fmt.Errorf("generate metainfo: %s", err)
   118  		}
   119  		r.stats.Counter("downloads").Inc(1)
   120  		for _, h := range hooks {
   121  			h.Run(d)
   122  		}
   123  		return nil
   124  	})
   125  	switch err {
   126  	case dedup.ErrRequestPending:
   127  		return ErrPending
   128  	case backenderrors.ErrBlobNotFound:
   129  		return ErrNotFound
   130  	case dedup.ErrWorkersBusy:
   131  		return ErrWorkersBusy
   132  	default:
   133  		return err
   134  	}
   135  }
   136  
   137  func (r *Refresher) download(client backend.Client, namespace string, d core.Digest) error {
   138  	name := d.Hex()
   139  	return r.cas.WriteCacheFile(name, func(w store.FileReadWriter) error {
   140  		return client.Download(namespace, name, w)
   141  	})
   142  }