github.com/tiagovtristao/plz@v13.4.0+incompatible/src/cache/rpc_cache.go (about)

     1  // +build !bootstrap
     2  
     3  // RPC-based remote cache. Similar to HTTP but likely higher performance.
     4  
     5  package cache
     6  
     7  import (
     8  	"bytes"
     9  	"github.com/thought-machine/please/src/core"
    10  	"crypto/tls"
    11  	"crypto/x509"
    12  	"encoding/base64"
    13  	"fmt"
    14  	"io/ioutil"
    15  	"os"
    16  	"path"
    17  	"runtime"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	"golang.org/x/net/context"
    22  	"google.golang.org/grpc"
    23  	"google.golang.org/grpc/codes"
    24  	"google.golang.org/grpc/credentials"
    25  	_ "google.golang.org/grpc/encoding/gzip" // Registers the gzip compressor at init
    26  	"google.golang.org/grpc/grpclog"
    27  	"google.golang.org/grpc/status"
    28  	"gopkg.in/op/go-logging.v1"
    29  
    30  	pb "github.com/thought-machine/please/src/cache/proto/rpc_cache"
    31  	"github.com/thought-machine/please/src/cache/tools"
    32  	"github.com/thought-machine/please/src/fs"
    33  )
    34  
    35  const maxErrors = 5
    36  
    37  // We use zeroKey in cases where we need to supply a hash but it actually doesn't matter.
    38  var zeroKey = []byte{0, 0, 0, 0}
    39  
    40  type rpcCache struct {
    41  	client     pb.RpcCacheClient
    42  	Writeable  bool
    43  	Connected  bool
    44  	Connecting bool
    45  	OSName     string
    46  	numErrors  int32
    47  	timeout    time.Duration
    48  	startTime  time.Time
    49  	maxMsgSize int
    50  	nodes      []cacheNode
    51  	hostname   string
    52  	compressor grpc.CallOption
    53  }
    54  
    55  type cacheNode struct {
    56  	cache     *rpcCache
    57  	hashStart uint32
    58  	hashEnd   uint32
    59  }
    60  
    61  func (cache *rpcCache) Store(target *core.BuildTarget, key []byte, files ...string) {
    62  	if cache.isConnected() && cache.Writeable {
    63  		log.Debug("Storing %s in RPC cache...", target.Label)
    64  		artifacts := []*pb.Artifact{}
    65  		totalSize := 0
    66  		for _, out := range cacheArtifacts(target, files...) {
    67  			artifacts2, size, err := cache.loadArtifacts(target, out)
    68  			if err != nil {
    69  				log.Warning("RPC cache failed to load artifact %s: %s", out, err)
    70  				cache.error()
    71  				return
    72  			}
    73  			totalSize += size
    74  			artifacts = append(artifacts, artifacts2...)
    75  		}
    76  		if totalSize > cache.maxMsgSize {
    77  			log.Info("Artifacts for %s exceed maximum message size of %s bytes", target.Label, cache.maxMsgSize)
    78  			return
    79  		}
    80  		cache.sendArtifacts(target, key, artifacts)
    81  	}
    82  }
    83  
    84  func (cache *rpcCache) StoreExtra(target *core.BuildTarget, key []byte, file string) {
    85  	if cache.isConnected() && cache.Writeable {
    86  		log.Debug("Storing %s : %s in RPC cache...", target.Label, file)
    87  		artifacts, totalSize, err := cache.loadArtifacts(target, file)
    88  		if err != nil {
    89  			log.Warning("RPC cache failed to load artifact %s: %s", file, err)
    90  			cache.error()
    91  			return
    92  		}
    93  		if totalSize > cache.maxMsgSize {
    94  			log.Info("Artifact %s for %s exceeds maximum message size of %s bytes", file, target.Label, cache.maxMsgSize)
    95  			return
    96  		}
    97  		cache.sendArtifacts(target, key, artifacts)
    98  	}
    99  }
   100  
   101  func (cache *rpcCache) loadArtifacts(target *core.BuildTarget, file string) ([]*pb.Artifact, int, error) {
   102  	outDir := target.OutDir()
   103  	root := path.Join(outDir, file)
   104  	totalSize := 1000 // Allow a little space for encoding overhead.
   105  	if info, err := os.Lstat(root); err == nil && (info.Mode()&os.ModeSymlink) != 0 {
   106  		// Artifact is a symlink.
   107  		dest, err := os.Readlink(root)
   108  		if err != nil {
   109  			return nil, totalSize, err
   110  		}
   111  		return []*pb.Artifact{{
   112  			Package: target.Label.PackageName,
   113  			Target:  target.Label.Name,
   114  			File:    file,
   115  			Symlink: dest,
   116  		}}, totalSize, nil
   117  	}
   118  	artifacts := []*pb.Artifact{}
   119  	err := fs.Walk(root, func(name string, isDir bool) error {
   120  		if !isDir {
   121  			content, err := ioutil.ReadFile(name)
   122  			if err != nil {
   123  				return err
   124  			}
   125  			artifacts = append(artifacts, &pb.Artifact{
   126  				Package: target.Label.PackageName,
   127  				Target:  target.Label.Name,
   128  				File:    name[len(outDir)+1:],
   129  				Body:    content,
   130  			})
   131  			totalSize += len(content)
   132  		}
   133  		return nil
   134  	})
   135  	return artifacts, totalSize, err
   136  }
   137  
   138  func (cache *rpcCache) sendArtifacts(target *core.BuildTarget, key []byte, artifacts []*pb.Artifact) {
   139  	req := pb.StoreRequest{
   140  		Artifacts: artifacts,
   141  		Hash:      key,
   142  		Os:        runtime.GOOS,
   143  		Arch:      runtime.GOARCH,
   144  		Hostname:  cache.hostname,
   145  	}
   146  	ctx, cancel := context.WithTimeout(context.Background(), cache.timeout)
   147  	defer cancel()
   148  	cache.runRPC(key, func(cache *rpcCache) (bool, []*pb.Artifact) {
   149  		_, err := cache.client.Store(ctx, &req, cache.compressor)
   150  		if err != nil {
   151  			log.Warning("Error communicating with RPC cache server: %s", err)
   152  			cache.error()
   153  		}
   154  		return err != nil, nil
   155  	})
   156  }
   157  
   158  func (cache *rpcCache) Retrieve(target *core.BuildTarget, key []byte) bool {
   159  	if !cache.isConnected() {
   160  		return false
   161  	}
   162  	req := pb.RetrieveRequest{Hash: key, Os: runtime.GOOS, Arch: runtime.GOARCH}
   163  	for _, out := range cacheArtifacts(target) {
   164  		artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name, File: out}
   165  		req.Artifacts = append(req.Artifacts, &artifact)
   166  	}
   167  	// We can't tell from here if retrieval has been successful for a target with no outputs.
   168  	// This is kind of weird but not actually disallowed, and we already have a test case for it,
   169  	// so might as well try to get it right here.
   170  	if len(req.Artifacts) == 0 {
   171  		return false
   172  	}
   173  	return cache.retrieveArtifacts(target, &req, true)
   174  }
   175  
   176  func (cache *rpcCache) RetrieveExtra(target *core.BuildTarget, key []byte, file string) bool {
   177  	if !cache.isConnected() {
   178  		return false
   179  	}
   180  	artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name, File: file}
   181  	artifacts := []*pb.Artifact{&artifact}
   182  	req := pb.RetrieveRequest{Hash: key, Os: runtime.GOOS, Arch: runtime.GOARCH, Artifacts: artifacts}
   183  	return cache.retrieveArtifacts(target, &req, false)
   184  }
   185  
   186  func (cache *rpcCache) retrieveArtifacts(target *core.BuildTarget, req *pb.RetrieveRequest, remove bool) bool {
   187  	ctx, cancel := context.WithTimeout(context.Background(), cache.timeout)
   188  	defer cancel()
   189  	success, artifacts := cache.runRPC(req.Hash, func(cache *rpcCache) (bool, []*pb.Artifact) {
   190  		response, err := cache.client.Retrieve(ctx, req, cache.compressor)
   191  		if err != nil {
   192  			log.Warning("Failed to retrieve artifacts for %s: %s", target.Label, err)
   193  			cache.error()
   194  			return false, nil
   195  		} else if !response.Success {
   196  			// Quiet, this is almost certainly just a 'not found'
   197  			log.Debug("Couldn't retrieve artifacts for %s [key %s] from RPC cache", target.Label, base64.RawURLEncoding.EncodeToString(req.Hash))
   198  		}
   199  		// This always counts as "success" in this context, i.e. do not bother retrying on the
   200  		// alternate if we were told that the artifact is not there.
   201  		return true, response.Artifacts
   202  	})
   203  	if !success {
   204  		return false
   205  	}
   206  	// Remove any existing outputs first; this is important for cases where the output is a
   207  	// directory, because we get back individual artifacts, and we need to make sure that
   208  	// only the retrieved artifacts are present in the output.
   209  	if remove {
   210  		for _, out := range target.Outputs() {
   211  			out := path.Join(target.OutDir(), out)
   212  			if err := os.RemoveAll(out); err != nil {
   213  				log.Error("Failed to remove artifact %s: %s", out, err)
   214  				return false
   215  			}
   216  		}
   217  	}
   218  	for _, artifact := range artifacts {
   219  		if !cache.writeFile(target, artifact.File, artifact.Body, artifact.Symlink) {
   220  			return false
   221  		}
   222  	}
   223  	// Sanity check: if we don't get anything back, assume it probably wasn't really a success.
   224  	return len(artifacts) > 0
   225  }
   226  
   227  func (cache *rpcCache) writeFile(target *core.BuildTarget, file string, body []byte, symlink string) bool {
   228  	out := path.Join(target.OutDir(), file)
   229  	if err := os.MkdirAll(path.Dir(out), core.DirPermissions); err != nil {
   230  		log.Warning("Failed to create directory for artifacts: %s", err)
   231  		return false
   232  	}
   233  	if symlink != "" {
   234  		if err := os.Symlink(symlink, out); err != nil {
   235  			log.Warning("RPC cache failed to create symlink %s", err)
   236  			return false
   237  		}
   238  	} else if err := fs.WriteFile(bytes.NewReader(body), out, target.OutMode()); err != nil {
   239  		log.Warning("RPC cache failed to write file %s", err)
   240  		return false
   241  	}
   242  	log.Debug("Retrieved %s - %s from RPC cache", target.Label, file)
   243  	return true
   244  }
   245  
   246  func (cache *rpcCache) Clean(target *core.BuildTarget) {
   247  	if cache.isConnected() && cache.Writeable {
   248  		req := pb.DeleteRequest{Os: runtime.GOOS, Arch: runtime.GOARCH}
   249  		artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name}
   250  		req.Artifacts = []*pb.Artifact{&artifact}
   251  		cache.runRPC(zeroKey, func(cache *rpcCache) (bool, []*pb.Artifact) {
   252  			response, err := cache.client.Delete(context.Background(), &req, cache.compressor)
   253  			if err != nil || !response.Success {
   254  				log.Errorf("Failed to remove %s from RPC cache", target.Label)
   255  				return false, nil
   256  			}
   257  			return true, nil
   258  		})
   259  	}
   260  }
   261  
   262  func (cache *rpcCache) CleanAll() {
   263  	if !cache.isConnected() {
   264  		log.Error("RPC cache is not connected, cannot clean")
   265  	} else if !cache.Writeable {
   266  		log.Error("RPC cache is not writable, will not clean")
   267  	} else {
   268  		log.Debug("Cleaning entire RPC cache")
   269  		req := pb.DeleteRequest{Everything: true}
   270  		cache.runRPC(zeroKey, func(cache *rpcCache) (bool, []*pb.Artifact) {
   271  			if response, err := cache.client.Delete(context.Background(), &req, cache.compressor); err != nil || !response.Success {
   272  				log.Errorf("Failed to clean RPC cache: %s", err)
   273  				return false, nil
   274  			}
   275  			return true, nil
   276  		})
   277  	}
   278  }
   279  
   280  func (cache *rpcCache) Shutdown() {}
   281  
   282  func (cache *rpcCache) connect(url string, config *core.Configuration, isSubnode bool) {
   283  	// Change grpc to log using our implementation
   284  	grpclog.SetLoggerV2(&grpcLogMabob{})
   285  	log.Info("Connecting to RPC cache at %s", url)
   286  	opts := []grpc.DialOption{
   287  		grpc.WithTimeout(cache.timeout),
   288  		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(cache.maxMsgSize), grpc.MaxCallSendMsgSize(cache.maxMsgSize)),
   289  	}
   290  	if config.Cache.RPCPublicKey != "" || config.Cache.RPCCACert != "" || config.Cache.RPCSecure {
   291  		auth, err := loadAuth(config.Cache.RPCCACert, config.Cache.RPCPublicKey, config.Cache.RPCPrivateKey)
   292  		if err != nil {
   293  			log.Warning("Failed to load RPC cache auth keys: %s", err)
   294  			return
   295  		}
   296  		opts = append(opts, auth)
   297  	} else {
   298  		opts = append(opts, grpc.WithInsecure())
   299  	}
   300  	connection, err := grpc.Dial(url, opts...)
   301  	if err != nil {
   302  		cache.Connecting = false
   303  		log.Warning("Failed to connect to RPC cache: %s", err)
   304  		return
   305  	}
   306  	// Stash hostname for later
   307  	if hostname, err := os.Hostname(); err == nil {
   308  		cache.hostname = hostname
   309  	}
   310  	// Message the server to get its cluster topology.
   311  	client := pb.NewRpcCacheClient(connection)
   312  	ctx, cancel := context.WithTimeout(context.Background(), cache.timeout)
   313  	defer cancel()
   314  	resp, err := client.ListNodes(ctx, &pb.ListRequest{})
   315  	// For compatibility with older servers, handle an error code of Unimplemented and treat
   316  	// as an unclustered server (because of course they can't be clustered).
   317  	if err != nil && status.Code(err) != codes.Unimplemented {
   318  		cache.Connecting = false
   319  		log.Warning("Failed to contact RPC cache: %s", err)
   320  		return
   321  	} else if isSubnode || resp == nil || len(resp.Nodes) == 0 {
   322  		// Server is not clustered, just use this one directly.
   323  		// Or we're one of the sub-nodes and we are meant to connect directly.
   324  		cache.client = client
   325  		cache.Connected = true
   326  		cache.Connecting = false
   327  		log.Info("RPC cache connected after %0.2fs", time.Since(cache.startTime).Seconds())
   328  		return
   329  	}
   330  	// If we get here, we are connected and the cache is clustered.
   331  	cache.nodes = make([]cacheNode, len(resp.Nodes))
   332  	for i, n := range resp.Nodes {
   333  		subCache, _ := newRPCCacheInternal(n.Address, config, true)
   334  		cache.nodes[i] = cacheNode{
   335  			cache:     subCache,
   336  			hashStart: n.HashBegin,
   337  			hashEnd:   n.HashEnd,
   338  		}
   339  	}
   340  	// We are now connected, the children aren't necessarily yet but that won't matter.
   341  	cache.Connected = true
   342  	cache.Connecting = false
   343  	log.Info("Top-level RPC cache connected after %0.2fs with %d known nodes", time.Since(cache.startTime).Seconds(), len(resp.Nodes))
   344  }
   345  
   346  // isConnected checks if the cache is connected. If it's still trying to connect it allows a
   347  // very brief wait to give it a chance to come online.
   348  func (cache *rpcCache) isConnected() bool {
   349  	if cache.Connected {
   350  		return true
   351  	} else if !cache.Connecting {
   352  		return false
   353  	}
   354  	ticker := time.NewTicker(10 * time.Millisecond)
   355  	for i := 0; i < 5 && cache.Connecting; i++ {
   356  		<-ticker.C
   357  	}
   358  	ticker.Stop()
   359  	return cache.Connected
   360  }
   361  
   362  // runRPC runs one RPC for a cache, with optional fallback to a replica on RPC failure
   363  // (but not if the RPC completes unsuccessfully).
   364  func (cache *rpcCache) runRPC(hash []byte, f func(*rpcCache) (bool, []*pb.Artifact)) (bool, []*pb.Artifact) {
   365  	if len(cache.nodes) == 0 {
   366  		// No clustering, just call it directly.
   367  		return f(cache)
   368  	}
   369  	try := func(hash uint32) (bool, []*pb.Artifact) {
   370  		for _, n := range cache.nodes {
   371  			if hash >= n.hashStart && hash < n.hashEnd {
   372  				if !n.cache.isConnected() {
   373  					return false, nil
   374  				}
   375  				return f(n.cache)
   376  			}
   377  		}
   378  		log.Warning("No RPC cache client available for %d", hash)
   379  		return false, nil
   380  	}
   381  	h := tools.Hash(hash)
   382  	success, artifacts := try(h)
   383  	if success {
   384  		return success, artifacts
   385  	}
   386  	log.Info("Initial replica failed for %d, will retry on the alternate", h)
   387  	return try(tools.AlternateHash(hash))
   388  }
   389  
   390  // error increments the error counter on the cache, and disables it if it gets too high.
   391  // Note that after this it won't reconnect; we could try that but it probably isn't worth it
   392  // (it's unlikely to restart in time if it's got a nontrivial set of artifacts to scan) and
   393  // the user has probably been pestered by enough messages already.
   394  func (cache *rpcCache) error() {
   395  	if atomic.AddInt32(&cache.numErrors, 1) >= maxErrors && cache.Connected {
   396  		log.Warning("Disabling RPC cache, looks like the connection has been lost")
   397  		cache.Connected = false
   398  	}
   399  }
   400  
   401  func newRPCCache(config *core.Configuration) (*rpcCache, error) {
   402  	return newRPCCacheInternal(config.Cache.RPCURL.String(), config, false)
   403  }
   404  
   405  func newRPCCacheInternal(url string, config *core.Configuration, isSubnode bool) (*rpcCache, error) {
   406  	cache := &rpcCache{
   407  		Writeable:  config.Cache.RPCWriteable,
   408  		Connecting: true,
   409  		timeout:    time.Duration(config.Cache.RPCTimeout),
   410  		startTime:  time.Now(),
   411  		maxMsgSize: int(config.Cache.RPCMaxMsgSize),
   412  		compressor: grpc.UseCompressor("gzip"),
   413  	}
   414  	go cache.connect(url, config, isSubnode)
   415  	return cache, nil
   416  }
   417  
   418  // grpcLogMabob is an implementation of grpc's logging interface using our backend.
   419  type grpcLogMabob struct{}
   420  
   421  func (g *grpcLogMabob) Info(args ...interface{})                    { log.Info("%s", args) }
   422  func (g *grpcLogMabob) Infof(format string, args ...interface{})    { log.Info(format, args...) }
   423  func (g *grpcLogMabob) Infoln(args ...interface{})                  { log.Info("%s", args) }
   424  func (g *grpcLogMabob) Warning(args ...interface{})                 { log.Warning("%s", args) }
   425  func (g *grpcLogMabob) Warningf(format string, args ...interface{}) { log.Warning(format, args...) }
   426  func (g *grpcLogMabob) Warningln(args ...interface{})               { log.Warning("%s", args) }
   427  func (g *grpcLogMabob) Error(args ...interface{})                   { log.Error("", args...) }
   428  func (g *grpcLogMabob) Errorf(format string, args ...interface{})   { log.Errorf(format, args...) }
   429  func (g *grpcLogMabob) Errorln(args ...interface{})                 { log.Error("", args...) }
   430  func (g *grpcLogMabob) Fatal(args ...interface{})                   { log.Fatal(args...) }
   431  func (g *grpcLogMabob) Fatalf(format string, args ...interface{})   { log.Fatalf(format, args...) }
   432  func (g *grpcLogMabob) Fatalln(args ...interface{})                 { log.Fatal(args...) }
   433  func (g *grpcLogMabob) V(l int) bool                                { return log.IsEnabledFor(logging.Level(l)) }
   434  
   435  // loadAuth loads authentication credentials from a given pair of public / private key files.
   436  func loadAuth(caCert, publicKey, privateKey string) (grpc.DialOption, error) {
   437  	config := tls.Config{}
   438  	if publicKey != "" {
   439  		log.Debug("Loading client certificate from %s, key %s", publicKey, privateKey)
   440  		cert, err := tls.LoadX509KeyPair(publicKey, privateKey)
   441  		if err != nil {
   442  			return nil, err
   443  		}
   444  		config.Certificates = []tls.Certificate{cert}
   445  	}
   446  	if caCert != "" {
   447  		log.Debug("Reading CA cert file from %s", caCert)
   448  		cert, err := ioutil.ReadFile(caCert)
   449  		if err != nil {
   450  			return nil, err
   451  		}
   452  		config.RootCAs = x509.NewCertPool()
   453  		if !config.RootCAs.AppendCertsFromPEM(cert) {
   454  			return nil, fmt.Errorf("Failed to add any PEM certificates from %s", caCert)
   455  		}
   456  	}
   457  	return grpc.WithTransportCredentials(credentials.NewTLS(&config)), nil
   458  }