github.com/tiagovtristao/plz@v13.4.0+incompatible/src/cache/rpc_cache.go (about) 1 // +build !bootstrap 2 3 // RPC-based remote cache. Similar to HTTP but likely higher performance. 4 5 package cache 6 7 import ( 8 "bytes" 9 "github.com/thought-machine/please/src/core" 10 "crypto/tls" 11 "crypto/x509" 12 "encoding/base64" 13 "fmt" 14 "io/ioutil" 15 "os" 16 "path" 17 "runtime" 18 "sync/atomic" 19 "time" 20 21 "golang.org/x/net/context" 22 "google.golang.org/grpc" 23 "google.golang.org/grpc/codes" 24 "google.golang.org/grpc/credentials" 25 _ "google.golang.org/grpc/encoding/gzip" // Registers the gzip compressor at init 26 "google.golang.org/grpc/grpclog" 27 "google.golang.org/grpc/status" 28 "gopkg.in/op/go-logging.v1" 29 30 pb "github.com/thought-machine/please/src/cache/proto/rpc_cache" 31 "github.com/thought-machine/please/src/cache/tools" 32 "github.com/thought-machine/please/src/fs" 33 ) 34 35 const maxErrors = 5 36 37 // We use zeroKey in cases where we need to supply a hash but it actually doesn't matter. 38 var zeroKey = []byte{0, 0, 0, 0} 39 40 type rpcCache struct { 41 client pb.RpcCacheClient 42 Writeable bool 43 Connected bool 44 Connecting bool 45 OSName string 46 numErrors int32 47 timeout time.Duration 48 startTime time.Time 49 maxMsgSize int 50 nodes []cacheNode 51 hostname string 52 compressor grpc.CallOption 53 } 54 55 type cacheNode struct { 56 cache *rpcCache 57 hashStart uint32 58 hashEnd uint32 59 } 60 61 func (cache *rpcCache) Store(target *core.BuildTarget, key []byte, files ...string) { 62 if cache.isConnected() && cache.Writeable { 63 log.Debug("Storing %s in RPC cache...", target.Label) 64 artifacts := []*pb.Artifact{} 65 totalSize := 0 66 for _, out := range cacheArtifacts(target, files...) { 67 artifacts2, size, err := cache.loadArtifacts(target, out) 68 if err != nil { 69 log.Warning("RPC cache failed to load artifact %s: %s", out, err) 70 cache.error() 71 return 72 } 73 totalSize += size 74 artifacts = append(artifacts, artifacts2...) 75 } 76 if totalSize > cache.maxMsgSize { 77 log.Info("Artifacts for %s exceed maximum message size of %s bytes", target.Label, cache.maxMsgSize) 78 return 79 } 80 cache.sendArtifacts(target, key, artifacts) 81 } 82 } 83 84 func (cache *rpcCache) StoreExtra(target *core.BuildTarget, key []byte, file string) { 85 if cache.isConnected() && cache.Writeable { 86 log.Debug("Storing %s : %s in RPC cache...", target.Label, file) 87 artifacts, totalSize, err := cache.loadArtifacts(target, file) 88 if err != nil { 89 log.Warning("RPC cache failed to load artifact %s: %s", file, err) 90 cache.error() 91 return 92 } 93 if totalSize > cache.maxMsgSize { 94 log.Info("Artifact %s for %s exceeds maximum message size of %s bytes", file, target.Label, cache.maxMsgSize) 95 return 96 } 97 cache.sendArtifacts(target, key, artifacts) 98 } 99 } 100 101 func (cache *rpcCache) loadArtifacts(target *core.BuildTarget, file string) ([]*pb.Artifact, int, error) { 102 outDir := target.OutDir() 103 root := path.Join(outDir, file) 104 totalSize := 1000 // Allow a little space for encoding overhead. 105 if info, err := os.Lstat(root); err == nil && (info.Mode()&os.ModeSymlink) != 0 { 106 // Artifact is a symlink. 107 dest, err := os.Readlink(root) 108 if err != nil { 109 return nil, totalSize, err 110 } 111 return []*pb.Artifact{{ 112 Package: target.Label.PackageName, 113 Target: target.Label.Name, 114 File: file, 115 Symlink: dest, 116 }}, totalSize, nil 117 } 118 artifacts := []*pb.Artifact{} 119 err := fs.Walk(root, func(name string, isDir bool) error { 120 if !isDir { 121 content, err := ioutil.ReadFile(name) 122 if err != nil { 123 return err 124 } 125 artifacts = append(artifacts, &pb.Artifact{ 126 Package: target.Label.PackageName, 127 Target: target.Label.Name, 128 File: name[len(outDir)+1:], 129 Body: content, 130 }) 131 totalSize += len(content) 132 } 133 return nil 134 }) 135 return artifacts, totalSize, err 136 } 137 138 func (cache *rpcCache) sendArtifacts(target *core.BuildTarget, key []byte, artifacts []*pb.Artifact) { 139 req := pb.StoreRequest{ 140 Artifacts: artifacts, 141 Hash: key, 142 Os: runtime.GOOS, 143 Arch: runtime.GOARCH, 144 Hostname: cache.hostname, 145 } 146 ctx, cancel := context.WithTimeout(context.Background(), cache.timeout) 147 defer cancel() 148 cache.runRPC(key, func(cache *rpcCache) (bool, []*pb.Artifact) { 149 _, err := cache.client.Store(ctx, &req, cache.compressor) 150 if err != nil { 151 log.Warning("Error communicating with RPC cache server: %s", err) 152 cache.error() 153 } 154 return err != nil, nil 155 }) 156 } 157 158 func (cache *rpcCache) Retrieve(target *core.BuildTarget, key []byte) bool { 159 if !cache.isConnected() { 160 return false 161 } 162 req := pb.RetrieveRequest{Hash: key, Os: runtime.GOOS, Arch: runtime.GOARCH} 163 for _, out := range cacheArtifacts(target) { 164 artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name, File: out} 165 req.Artifacts = append(req.Artifacts, &artifact) 166 } 167 // We can't tell from here if retrieval has been successful for a target with no outputs. 168 // This is kind of weird but not actually disallowed, and we already have a test case for it, 169 // so might as well try to get it right here. 170 if len(req.Artifacts) == 0 { 171 return false 172 } 173 return cache.retrieveArtifacts(target, &req, true) 174 } 175 176 func (cache *rpcCache) RetrieveExtra(target *core.BuildTarget, key []byte, file string) bool { 177 if !cache.isConnected() { 178 return false 179 } 180 artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name, File: file} 181 artifacts := []*pb.Artifact{&artifact} 182 req := pb.RetrieveRequest{Hash: key, Os: runtime.GOOS, Arch: runtime.GOARCH, Artifacts: artifacts} 183 return cache.retrieveArtifacts(target, &req, false) 184 } 185 186 func (cache *rpcCache) retrieveArtifacts(target *core.BuildTarget, req *pb.RetrieveRequest, remove bool) bool { 187 ctx, cancel := context.WithTimeout(context.Background(), cache.timeout) 188 defer cancel() 189 success, artifacts := cache.runRPC(req.Hash, func(cache *rpcCache) (bool, []*pb.Artifact) { 190 response, err := cache.client.Retrieve(ctx, req, cache.compressor) 191 if err != nil { 192 log.Warning("Failed to retrieve artifacts for %s: %s", target.Label, err) 193 cache.error() 194 return false, nil 195 } else if !response.Success { 196 // Quiet, this is almost certainly just a 'not found' 197 log.Debug("Couldn't retrieve artifacts for %s [key %s] from RPC cache", target.Label, base64.RawURLEncoding.EncodeToString(req.Hash)) 198 } 199 // This always counts as "success" in this context, i.e. do not bother retrying on the 200 // alternate if we were told that the artifact is not there. 201 return true, response.Artifacts 202 }) 203 if !success { 204 return false 205 } 206 // Remove any existing outputs first; this is important for cases where the output is a 207 // directory, because we get back individual artifacts, and we need to make sure that 208 // only the retrieved artifacts are present in the output. 209 if remove { 210 for _, out := range target.Outputs() { 211 out := path.Join(target.OutDir(), out) 212 if err := os.RemoveAll(out); err != nil { 213 log.Error("Failed to remove artifact %s: %s", out, err) 214 return false 215 } 216 } 217 } 218 for _, artifact := range artifacts { 219 if !cache.writeFile(target, artifact.File, artifact.Body, artifact.Symlink) { 220 return false 221 } 222 } 223 // Sanity check: if we don't get anything back, assume it probably wasn't really a success. 224 return len(artifacts) > 0 225 } 226 227 func (cache *rpcCache) writeFile(target *core.BuildTarget, file string, body []byte, symlink string) bool { 228 out := path.Join(target.OutDir(), file) 229 if err := os.MkdirAll(path.Dir(out), core.DirPermissions); err != nil { 230 log.Warning("Failed to create directory for artifacts: %s", err) 231 return false 232 } 233 if symlink != "" { 234 if err := os.Symlink(symlink, out); err != nil { 235 log.Warning("RPC cache failed to create symlink %s", err) 236 return false 237 } 238 } else if err := fs.WriteFile(bytes.NewReader(body), out, target.OutMode()); err != nil { 239 log.Warning("RPC cache failed to write file %s", err) 240 return false 241 } 242 log.Debug("Retrieved %s - %s from RPC cache", target.Label, file) 243 return true 244 } 245 246 func (cache *rpcCache) Clean(target *core.BuildTarget) { 247 if cache.isConnected() && cache.Writeable { 248 req := pb.DeleteRequest{Os: runtime.GOOS, Arch: runtime.GOARCH} 249 artifact := pb.Artifact{Package: target.Label.PackageName, Target: target.Label.Name} 250 req.Artifacts = []*pb.Artifact{&artifact} 251 cache.runRPC(zeroKey, func(cache *rpcCache) (bool, []*pb.Artifact) { 252 response, err := cache.client.Delete(context.Background(), &req, cache.compressor) 253 if err != nil || !response.Success { 254 log.Errorf("Failed to remove %s from RPC cache", target.Label) 255 return false, nil 256 } 257 return true, nil 258 }) 259 } 260 } 261 262 func (cache *rpcCache) CleanAll() { 263 if !cache.isConnected() { 264 log.Error("RPC cache is not connected, cannot clean") 265 } else if !cache.Writeable { 266 log.Error("RPC cache is not writable, will not clean") 267 } else { 268 log.Debug("Cleaning entire RPC cache") 269 req := pb.DeleteRequest{Everything: true} 270 cache.runRPC(zeroKey, func(cache *rpcCache) (bool, []*pb.Artifact) { 271 if response, err := cache.client.Delete(context.Background(), &req, cache.compressor); err != nil || !response.Success { 272 log.Errorf("Failed to clean RPC cache: %s", err) 273 return false, nil 274 } 275 return true, nil 276 }) 277 } 278 } 279 280 func (cache *rpcCache) Shutdown() {} 281 282 func (cache *rpcCache) connect(url string, config *core.Configuration, isSubnode bool) { 283 // Change grpc to log using our implementation 284 grpclog.SetLoggerV2(&grpcLogMabob{}) 285 log.Info("Connecting to RPC cache at %s", url) 286 opts := []grpc.DialOption{ 287 grpc.WithTimeout(cache.timeout), 288 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(cache.maxMsgSize), grpc.MaxCallSendMsgSize(cache.maxMsgSize)), 289 } 290 if config.Cache.RPCPublicKey != "" || config.Cache.RPCCACert != "" || config.Cache.RPCSecure { 291 auth, err := loadAuth(config.Cache.RPCCACert, config.Cache.RPCPublicKey, config.Cache.RPCPrivateKey) 292 if err != nil { 293 log.Warning("Failed to load RPC cache auth keys: %s", err) 294 return 295 } 296 opts = append(opts, auth) 297 } else { 298 opts = append(opts, grpc.WithInsecure()) 299 } 300 connection, err := grpc.Dial(url, opts...) 301 if err != nil { 302 cache.Connecting = false 303 log.Warning("Failed to connect to RPC cache: %s", err) 304 return 305 } 306 // Stash hostname for later 307 if hostname, err := os.Hostname(); err == nil { 308 cache.hostname = hostname 309 } 310 // Message the server to get its cluster topology. 311 client := pb.NewRpcCacheClient(connection) 312 ctx, cancel := context.WithTimeout(context.Background(), cache.timeout) 313 defer cancel() 314 resp, err := client.ListNodes(ctx, &pb.ListRequest{}) 315 // For compatibility with older servers, handle an error code of Unimplemented and treat 316 // as an unclustered server (because of course they can't be clustered). 317 if err != nil && status.Code(err) != codes.Unimplemented { 318 cache.Connecting = false 319 log.Warning("Failed to contact RPC cache: %s", err) 320 return 321 } else if isSubnode || resp == nil || len(resp.Nodes) == 0 { 322 // Server is not clustered, just use this one directly. 323 // Or we're one of the sub-nodes and we are meant to connect directly. 324 cache.client = client 325 cache.Connected = true 326 cache.Connecting = false 327 log.Info("RPC cache connected after %0.2fs", time.Since(cache.startTime).Seconds()) 328 return 329 } 330 // If we get here, we are connected and the cache is clustered. 331 cache.nodes = make([]cacheNode, len(resp.Nodes)) 332 for i, n := range resp.Nodes { 333 subCache, _ := newRPCCacheInternal(n.Address, config, true) 334 cache.nodes[i] = cacheNode{ 335 cache: subCache, 336 hashStart: n.HashBegin, 337 hashEnd: n.HashEnd, 338 } 339 } 340 // We are now connected, the children aren't necessarily yet but that won't matter. 341 cache.Connected = true 342 cache.Connecting = false 343 log.Info("Top-level RPC cache connected after %0.2fs with %d known nodes", time.Since(cache.startTime).Seconds(), len(resp.Nodes)) 344 } 345 346 // isConnected checks if the cache is connected. If it's still trying to connect it allows a 347 // very brief wait to give it a chance to come online. 348 func (cache *rpcCache) isConnected() bool { 349 if cache.Connected { 350 return true 351 } else if !cache.Connecting { 352 return false 353 } 354 ticker := time.NewTicker(10 * time.Millisecond) 355 for i := 0; i < 5 && cache.Connecting; i++ { 356 <-ticker.C 357 } 358 ticker.Stop() 359 return cache.Connected 360 } 361 362 // runRPC runs one RPC for a cache, with optional fallback to a replica on RPC failure 363 // (but not if the RPC completes unsuccessfully). 364 func (cache *rpcCache) runRPC(hash []byte, f func(*rpcCache) (bool, []*pb.Artifact)) (bool, []*pb.Artifact) { 365 if len(cache.nodes) == 0 { 366 // No clustering, just call it directly. 367 return f(cache) 368 } 369 try := func(hash uint32) (bool, []*pb.Artifact) { 370 for _, n := range cache.nodes { 371 if hash >= n.hashStart && hash < n.hashEnd { 372 if !n.cache.isConnected() { 373 return false, nil 374 } 375 return f(n.cache) 376 } 377 } 378 log.Warning("No RPC cache client available for %d", hash) 379 return false, nil 380 } 381 h := tools.Hash(hash) 382 success, artifacts := try(h) 383 if success { 384 return success, artifacts 385 } 386 log.Info("Initial replica failed for %d, will retry on the alternate", h) 387 return try(tools.AlternateHash(hash)) 388 } 389 390 // error increments the error counter on the cache, and disables it if it gets too high. 391 // Note that after this it won't reconnect; we could try that but it probably isn't worth it 392 // (it's unlikely to restart in time if it's got a nontrivial set of artifacts to scan) and 393 // the user has probably been pestered by enough messages already. 394 func (cache *rpcCache) error() { 395 if atomic.AddInt32(&cache.numErrors, 1) >= maxErrors && cache.Connected { 396 log.Warning("Disabling RPC cache, looks like the connection has been lost") 397 cache.Connected = false 398 } 399 } 400 401 func newRPCCache(config *core.Configuration) (*rpcCache, error) { 402 return newRPCCacheInternal(config.Cache.RPCURL.String(), config, false) 403 } 404 405 func newRPCCacheInternal(url string, config *core.Configuration, isSubnode bool) (*rpcCache, error) { 406 cache := &rpcCache{ 407 Writeable: config.Cache.RPCWriteable, 408 Connecting: true, 409 timeout: time.Duration(config.Cache.RPCTimeout), 410 startTime: time.Now(), 411 maxMsgSize: int(config.Cache.RPCMaxMsgSize), 412 compressor: grpc.UseCompressor("gzip"), 413 } 414 go cache.connect(url, config, isSubnode) 415 return cache, nil 416 } 417 418 // grpcLogMabob is an implementation of grpc's logging interface using our backend. 419 type grpcLogMabob struct{} 420 421 func (g *grpcLogMabob) Info(args ...interface{}) { log.Info("%s", args) } 422 func (g *grpcLogMabob) Infof(format string, args ...interface{}) { log.Info(format, args...) } 423 func (g *grpcLogMabob) Infoln(args ...interface{}) { log.Info("%s", args) } 424 func (g *grpcLogMabob) Warning(args ...interface{}) { log.Warning("%s", args) } 425 func (g *grpcLogMabob) Warningf(format string, args ...interface{}) { log.Warning(format, args...) } 426 func (g *grpcLogMabob) Warningln(args ...interface{}) { log.Warning("%s", args) } 427 func (g *grpcLogMabob) Error(args ...interface{}) { log.Error("", args...) } 428 func (g *grpcLogMabob) Errorf(format string, args ...interface{}) { log.Errorf(format, args...) } 429 func (g *grpcLogMabob) Errorln(args ...interface{}) { log.Error("", args...) } 430 func (g *grpcLogMabob) Fatal(args ...interface{}) { log.Fatal(args...) } 431 func (g *grpcLogMabob) Fatalf(format string, args ...interface{}) { log.Fatalf(format, args...) } 432 func (g *grpcLogMabob) Fatalln(args ...interface{}) { log.Fatal(args...) } 433 func (g *grpcLogMabob) V(l int) bool { return log.IsEnabledFor(logging.Level(l)) } 434 435 // loadAuth loads authentication credentials from a given pair of public / private key files. 436 func loadAuth(caCert, publicKey, privateKey string) (grpc.DialOption, error) { 437 config := tls.Config{} 438 if publicKey != "" { 439 log.Debug("Loading client certificate from %s, key %s", publicKey, privateKey) 440 cert, err := tls.LoadX509KeyPair(publicKey, privateKey) 441 if err != nil { 442 return nil, err 443 } 444 config.Certificates = []tls.Certificate{cert} 445 } 446 if caCert != "" { 447 log.Debug("Reading CA cert file from %s", caCert) 448 cert, err := ioutil.ReadFile(caCert) 449 if err != nil { 450 return nil, err 451 } 452 config.RootCAs = x509.NewCertPool() 453 if !config.RootCAs.AppendCertsFromPEM(cert) { 454 return nil, fmt.Errorf("Failed to add any PEM certificates from %s", caCert) 455 } 456 } 457 return grpc.WithTransportCredentials(credentials.NewTLS(&config)), nil 458 }