github.com/uber/kraken@v0.1.4/lib/blobrefresh/refresher.go (about) 1 // Copyright (c) 2016-2019 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 package blobrefresh 15 16 import ( 17 "errors" 18 "fmt" 19 "time" 20 21 "github.com/uber/kraken/core" 22 "github.com/uber/kraken/lib/backend" 23 "github.com/uber/kraken/lib/backend/backenderrors" 24 "github.com/uber/kraken/lib/metainfogen" 25 "github.com/uber/kraken/lib/store" 26 "github.com/uber/kraken/utils/dedup" 27 "github.com/uber/kraken/utils/log" 28 29 "github.com/andres-erbsen/clock" 30 "github.com/c2h5oh/datasize" 31 "github.com/uber-go/tally" 32 ) 33 34 // Refresher errors. 35 var ( 36 ErrPending = errors.New("download is pending") 37 ErrNotFound = errors.New("blob not found") 38 ErrWorkersBusy = errors.New("no workers available") 39 ) 40 41 // PostHook runs after the blob has been downloaded within the context of the 42 // deduplicated request. 43 type PostHook interface { 44 Run(d core.Digest) 45 } 46 47 // Refresher deduplicates blob downloads / metainfo generation. Refresher is not 48 // responsible for tracking whether blobs already exist on disk -- it only provides 49 // a method for downloading blobs in a deduplicated fashion. 50 type Refresher struct { 51 config Config 52 stats tally.Scope 53 requests *dedup.RequestCache 54 cas *store.CAStore 55 backends *backend.Manager 56 metaInfoGenerator *metainfogen.Generator 57 } 58 59 // New creates a new Refresher. 60 func New( 61 config Config, 62 stats tally.Scope, 63 cas *store.CAStore, 64 backends *backend.Manager, 65 metaInfoGenerator *metainfogen.Generator) *Refresher { 66 67 stats = stats.Tagged(map[string]string{ 68 "module": "blobrefresh", 69 }) 70 71 requests := dedup.NewRequestCache(dedup.RequestCacheConfig{}, clock.New()) 72 requests.SetNotFound(func(err error) bool { return err == backenderrors.ErrBlobNotFound }) 73 74 return &Refresher{config, stats, requests, cas, backends, metaInfoGenerator} 75 } 76 77 // Refresh kicks off a background goroutine to download the blob for d from the 78 // remote backend configured for namespace and generates metainfo for the blob. 79 // Returns ErrPending if an existing download for the blob is already running. 80 // Returns ErrNotFound if the blob is not found. Returns ErrWorkersBusy if no 81 // goroutines are available to run the download. 82 func (r *Refresher) Refresh(namespace string, d core.Digest, hooks ...PostHook) error { 83 client, err := r.backends.GetClient(namespace) 84 if err != nil { 85 return fmt.Errorf("backend manager: %s", err) 86 } 87 88 // Always check whether the blob is actually available and valid before 89 // returning a potential pending error. This ensures that the majority of 90 // errors are propogated quickly and syncronously. 91 info, err := client.Stat(namespace, d.Hex()) 92 if err != nil { 93 if err == backenderrors.ErrBlobNotFound { 94 return ErrNotFound 95 } 96 return fmt.Errorf("stat: %s", err) 97 } 98 size := datasize.ByteSize(info.Size) 99 if r.config.SizeLimit > 0 && size > r.config.SizeLimit { 100 return fmt.Errorf("%s blob exceeds size limit of %s", size, r.config.SizeLimit) 101 } 102 103 id := namespace + ":" + d.Hex() 104 err = r.requests.Start(id, func() error { 105 start := time.Now() 106 if err := r.download(client, namespace, d); err != nil { 107 return err 108 } 109 t := time.Since(start) 110 r.stats.Timer("download_remote_blob").Record(t) 111 log.With( 112 "namespace", namespace, 113 "name", d.Hex(), 114 "download_time", t).Info("Downloaded remote blob") 115 116 if err := r.metaInfoGenerator.Generate(d); err != nil { 117 return fmt.Errorf("generate metainfo: %s", err) 118 } 119 r.stats.Counter("downloads").Inc(1) 120 for _, h := range hooks { 121 h.Run(d) 122 } 123 return nil 124 }) 125 switch err { 126 case dedup.ErrRequestPending: 127 return ErrPending 128 case backenderrors.ErrBlobNotFound: 129 return ErrNotFound 130 case dedup.ErrWorkersBusy: 131 return ErrWorkersBusy 132 default: 133 return err 134 } 135 } 136 137 func (r *Refresher) download(client backend.Client, namespace string, d core.Digest) error { 138 name := d.Hex() 139 return r.cas.WriteCacheFile(name, func(w store.FileReadWriter) error { 140 return client.Download(namespace, name, w) 141 }) 142 }