github.com/uber/kraken@v0.1.4/origin/blobclient/cluster_client.go (about)

     1  // Copyright (c) 2016-2019 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  package blobclient
    15  
    16  import (
    17  	"errors"
    18  	"fmt"
    19  	"io"
    20  	"math/rand"
    21  	"net/http"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/cenkalti/backoff"
    26  
    27  	"github.com/uber/kraken/core"
    28  	"github.com/uber/kraken/lib/hostlist"
    29  	"github.com/uber/kraken/utils/errutil"
    30  	"github.com/uber/kraken/utils/httputil"
    31  	"github.com/uber/kraken/utils/log"
    32  )
    33  
    34  // Locations queries cluster for the locations of d.
    35  func Locations(p Provider, cluster hostlist.List, d core.Digest) (locs []string, err error) {
    36  	addrs := cluster.Resolve().Sample(3)
    37  	if len(addrs) == 0 {
    38  		return nil, errors.New("cluster is empty")
    39  	}
    40  	for addr := range addrs {
    41  		locs, err = p.Provide(addr).Locations(d)
    42  		if err != nil {
    43  			continue
    44  		}
    45  		break
    46  	}
    47  	return locs, err
    48  }
    49  
    50  // ClientResolver resolves digests into Clients of origins.
    51  type ClientResolver interface {
    52  	// Resolve must return an ordered, stable list of Clients for origins owning d.
    53  	Resolve(d core.Digest) ([]Client, error)
    54  }
    55  
    56  type clientResolver struct {
    57  	provider Provider
    58  	cluster  hostlist.List
    59  }
    60  
    61  // NewClientResolver returns a new client resolver.
    62  func NewClientResolver(p Provider, cluster hostlist.List) ClientResolver {
    63  	return &clientResolver{p, cluster}
    64  }
    65  
    66  func (r *clientResolver) Resolve(d core.Digest) ([]Client, error) {
    67  	locs, err := Locations(r.provider, r.cluster, d)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  	var clients []Client
    72  	for _, loc := range locs {
    73  		clients = append(clients, r.provider.Provide(loc))
    74  	}
    75  	return clients, nil
    76  }
    77  
    78  // ClusterClient defines a top-level origin cluster client which handles blob
    79  // location resolution and retries.
    80  type ClusterClient interface {
    81  	UploadBlob(namespace string, d core.Digest, blob io.Reader) error
    82  	DownloadBlob(namespace string, d core.Digest, dst io.Writer) error
    83  	GetMetaInfo(namespace string, d core.Digest) (*core.MetaInfo, error)
    84  	Stat(namespace string, d core.Digest) (*core.BlobInfo, error)
    85  	OverwriteMetaInfo(d core.Digest, pieceLength int64) error
    86  	Owners(d core.Digest) ([]core.PeerContext, error)
    87  	ReplicateToRemote(namespace string, d core.Digest, remoteDNS string) error
    88  }
    89  
    90  type clusterClient struct {
    91  	resolver ClientResolver
    92  }
    93  
    94  // NewClusterClient returns a new ClusterClient.
    95  func NewClusterClient(r ClientResolver) ClusterClient {
    96  	return &clusterClient{r}
    97  }
    98  
    99  // defaultPollBackOff returns the default backoff used on Poll operations.
   100  func (c *clusterClient) defaultPollBackOff() backoff.BackOff {
   101  	return &backoff.ExponentialBackOff{
   102  		InitialInterval:     time.Second,
   103  		RandomizationFactor: 0.05,
   104  		Multiplier:          1.3,
   105  		MaxInterval:         5 * time.Second,
   106  		MaxElapsedTime:      15 * time.Minute,
   107  		Clock:               backoff.SystemClock,
   108  	}
   109  }
   110  
   111  // UploadBlob uploads blob to origin cluster. See Client.UploadBlob for more details.
   112  func (c *clusterClient) UploadBlob(namespace string, d core.Digest, blob io.Reader) (err error) {
   113  	clients, err := c.resolver.Resolve(d)
   114  	if err != nil {
   115  		return fmt.Errorf("resolve clients: %s", err)
   116  	}
   117  
   118  	// We prefer the origin with highest hashing score so the first origin will handle
   119  	// replication to origins with lower score. This is because we want to reduce upload
   120  	// conflicts between local replicas.
   121  	for _, client := range clients {
   122  		err = client.UploadBlob(namespace, d, blob)
   123  		if httputil.IsNetworkError(err) {
   124  			continue
   125  		}
   126  		break
   127  	}
   128  	return err
   129  }
   130  
   131  // GetMetaInfo returns the metainfo for d. Does not handle polling.
   132  func (c *clusterClient) GetMetaInfo(namespace string, d core.Digest) (mi *core.MetaInfo, err error) {
   133  	clients, err := c.resolver.Resolve(d)
   134  	if err != nil {
   135  		return nil, fmt.Errorf("resolve clients: %s", err)
   136  	}
   137  	for _, client := range clients {
   138  		mi, err = client.GetMetaInfo(namespace, d)
   139  		// Do not try the next replica on 202 errors.
   140  		if err != nil && !httputil.IsAccepted(err) {
   141  			continue
   142  		}
   143  		break
   144  	}
   145  	return mi, err
   146  }
   147  
   148  // Stat checks availability of a blob in the cluster.
   149  func (c *clusterClient) Stat(namespace string, d core.Digest) (bi *core.BlobInfo, err error) {
   150  	clients, err := c.resolver.Resolve(d)
   151  	if err != nil {
   152  		return nil, fmt.Errorf("resolve clients: %s", err)
   153  	}
   154  
   155  	shuffle(clients)
   156  	for _, client := range clients {
   157  		bi, err = client.Stat(namespace, d)
   158  		if err != nil {
   159  			continue
   160  		}
   161  		break
   162  	}
   163  
   164  	return bi, err
   165  }
   166  
   167  // OverwriteMetaInfo overwrites existing metainfo for d with new metainfo configured
   168  // with pieceLength on every origin server. Returns error if any origin was unable
   169  // to overwrite metainfo. Primarly intended for benchmarking purposes.
   170  func (c *clusterClient) OverwriteMetaInfo(d core.Digest, pieceLength int64) error {
   171  	clients, err := c.resolver.Resolve(d)
   172  	if err != nil {
   173  		return fmt.Errorf("resolve clients: %s", err)
   174  	}
   175  	var errs []error
   176  	for _, client := range clients {
   177  		if err := client.OverwriteMetaInfo(d, pieceLength); err != nil {
   178  			errs = append(errs, fmt.Errorf("origin %s: %s", client.Addr(), err))
   179  		}
   180  	}
   181  	return errutil.Join(errs)
   182  }
   183  
   184  // DownloadBlob pulls a blob from the origin cluster.
   185  func (c *clusterClient) DownloadBlob(namespace string, d core.Digest, dst io.Writer) error {
   186  	err := Poll(c.resolver, c.defaultPollBackOff(), d, func(client Client) error {
   187  		return client.DownloadBlob(namespace, d, dst)
   188  	})
   189  	if httputil.IsNotFound(err) {
   190  		err = ErrBlobNotFound
   191  	}
   192  	return err
   193  }
   194  
   195  // Owners returns the origin peers which own d.
   196  func (c *clusterClient) Owners(d core.Digest) ([]core.PeerContext, error) {
   197  	clients, err := c.resolver.Resolve(d)
   198  	if err != nil {
   199  		return nil, fmt.Errorf("resolve clients: %s", err)
   200  	}
   201  
   202  	var mu sync.Mutex
   203  	var peers []core.PeerContext
   204  	var errs []error
   205  
   206  	var wg sync.WaitGroup
   207  	for _, client := range clients {
   208  		wg.Add(1)
   209  		go func(client Client) {
   210  			defer wg.Done()
   211  			pctx, err := client.GetPeerContext()
   212  			mu.Lock()
   213  			if err != nil {
   214  				errs = append(errs, err)
   215  			} else {
   216  				peers = append(peers, pctx)
   217  			}
   218  			mu.Unlock()
   219  		}(client)
   220  	}
   221  	wg.Wait()
   222  
   223  	err = errutil.Join(errs)
   224  
   225  	if len(peers) == 0 {
   226  		if err != nil {
   227  			return nil, err
   228  		}
   229  		return nil, errors.New("no origin peers found")
   230  	}
   231  
   232  	if err != nil {
   233  		log.With("blob", d.Hex()).Errorf("Error getting all origin peers: %s", err)
   234  	}
   235  	return peers, nil
   236  }
   237  
   238  // ReplicateToRemote replicates d to a remote origin cluster.
   239  func (c *clusterClient) ReplicateToRemote(namespace string, d core.Digest, remoteDNS string) error {
   240  	// Re-use download backoff since replicate may download blobs.
   241  	return Poll(c.resolver, c.defaultPollBackOff(), d, func(client Client) error {
   242  		return client.ReplicateToRemote(namespace, d, remoteDNS)
   243  	})
   244  }
   245  
   246  func shuffle(cs []Client) {
   247  	for i := range cs {
   248  		j := rand.Intn(i + 1)
   249  		cs[i], cs[j] = cs[j], cs[i]
   250  	}
   251  }
   252  
   253  // Poll wraps requests for endpoints which require polling, due to a blob
   254  // being asynchronously fetched from remote storage in the origin cluster.
   255  func Poll(
   256  	r ClientResolver, b backoff.BackOff, d core.Digest, makeRequest func(Client) error) error {
   257  
   258  	// By looping over clients in order, we will always prefer the same origin
   259  	// for making requests to loosely guarantee that only one origin needs to
   260  	// fetch the file from remote backend.
   261  	clients, err := r.Resolve(d)
   262  	if err != nil {
   263  		return fmt.Errorf("resolve clients: %s", err)
   264  	}
   265  	var errs []error
   266  ORIGINS:
   267  	for _, client := range clients {
   268  		b.Reset()
   269  	POLL:
   270  		for {
   271  			if err := makeRequest(client); err != nil {
   272  				if serr, ok := err.(httputil.StatusError); ok {
   273  					if serr.Status == http.StatusAccepted {
   274  						d := b.NextBackOff()
   275  						if d == backoff.Stop {
   276  							break POLL // Backoff timed out.
   277  						}
   278  						time.Sleep(d)
   279  						continue POLL
   280  					}
   281  					if serr.Status < 500 {
   282  						return err
   283  					}
   284  				}
   285  				errs = append(errs, fmt.Errorf("origin %s: %s", client.Addr(), err))
   286  				continue ORIGINS
   287  			}
   288  			return nil // Success!
   289  		}
   290  		errs = append(errs,
   291  			fmt.Errorf("origin %s: backoff timed out on 202 responses", client.Addr()))
   292  	}
   293  	return fmt.Errorf("all origins unavailable: %s", errutil.Join(errs))
   294  }