github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/cache/prog.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "bufio" 9 "context" 10 "crypto/sha256" 11 "encoding/base64" 12 "encoding/json" 13 "errors" 14 "fmt" 15 "io" 16 "log" 17 "os" 18 "os/exec" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 "github.com/go-asm/go/cmd/go/base" 24 "github.com/go-asm/go/cmd/quoted" 25 ) 26 27 // ProgCache implements Cache via JSON messages over stdin/stdout to a child 28 // helper process which can then implement whatever caching policy/mechanism it 29 // wants. 30 // 31 // See https://github.com/golang/go/issues/59719 32 type ProgCache struct { 33 cmd *exec.Cmd 34 stdout io.ReadCloser // from the child process 35 stdin io.WriteCloser // to the child process 36 bw *bufio.Writer // to stdin 37 jenc *json.Encoder // to bw 38 39 // can are the commands that the child process declared that it supports. 40 // This is effectively the versioning mechanism. 41 can map[ProgCmd]bool 42 43 // fuzzDirCache is another Cache implementation to use for the FuzzDir 44 // method. In practice this is the default GOCACHE disk-based 45 // implementation. 46 // 47 // TODO(bradfitz): maybe this isn't ideal. But we'd need to extend the Cache 48 // interface and the fuzzing callers to be less disk-y to do more here. 49 fuzzDirCache Cache 50 51 closing atomic.Bool 52 ctx context.Context // valid until Close via ctxClose 53 ctxCancel context.CancelFunc // called on Close 54 readLoopDone chan struct{} // closed when readLoop returns 55 56 mu sync.Mutex // guards following fields 57 nextID int64 58 inFlight map[int64]chan<- *ProgResponse 59 outputFile map[OutputID]string // object => abs path on disk 60 61 // writeMu serializes writing to the child process. 62 // It must never be held at the same time as mu. 63 writeMu sync.Mutex 64 } 65 66 // ProgCmd is a command that can be issued to a child process. 67 // 68 // If the interface needs to grow, we can add new commands or new versioned 69 // commands like "get2". 70 type ProgCmd string 71 72 const ( 73 cmdGet = ProgCmd("get") 74 cmdPut = ProgCmd("put") 75 cmdClose = ProgCmd("close") 76 ) 77 78 // ProgRequest is the JSON-encoded message that's sent from cmd/go to 79 // the GOCACHEPROG child process over stdin. Each JSON object is on its 80 // own line. A ProgRequest of Type "put" with BodySize > 0 will be followed 81 // by a line containing a base64-encoded JSON string literal of the body. 82 type ProgRequest struct { 83 // ID is a unique number per process across all requests. 84 // It must be echoed in the ProgResponse from the child. 85 ID int64 86 87 // Command is the type of request. 88 // The cmd/go tool will only send commands that were declared 89 // as supported by the child. 90 Command ProgCmd 91 92 // ActionID is non-nil for get and puts. 93 ActionID []byte `json:",omitempty"` // or nil if not used 94 95 // ObjectID is set for Type "put" and "output-file". 96 ObjectID []byte `json:",omitempty"` // or nil if not used 97 98 // Body is the body for "put" requests. It's sent after the JSON object 99 // as a base64-encoded JSON string when BodySize is non-zero. 100 // It's sent as a separate JSON value instead of being a struct field 101 // send in this JSON object so large values can be streamed in both directions. 102 // The base64 string body of a ProgRequest will always be written 103 // immediately after the JSON object and a newline. 104 Body io.Reader `json:"-"` 105 106 // BodySize is the number of bytes of Body. If zero, the body isn't written. 107 BodySize int64 `json:",omitempty"` 108 } 109 110 // ProgResponse is the JSON response from the child process to cmd/go. 111 // 112 // With the exception of the first protocol message that the child writes to its 113 // stdout with ID==0 and KnownCommands populated, these are only sent in 114 // response to a ProgRequest from cmd/go. 115 // 116 // ProgResponses can be sent in any order. The ID must match the request they're 117 // replying to. 118 type ProgResponse struct { 119 ID int64 // that corresponds to ProgRequest; they can be answered out of order 120 Err string `json:",omitempty"` // if non-empty, the error 121 122 // KnownCommands is included in the first message that cache helper program 123 // writes to stdout on startup (with ID==0). It includes the 124 // ProgRequest.Command types that are supported by the program. 125 // 126 // This lets us extend the protocol gracefully over time (adding "get2", 127 // etc), or fail gracefully when needed. It also lets us verify the program 128 // wants to be a cache helper. 129 KnownCommands []ProgCmd `json:",omitempty"` 130 131 // For Get requests. 132 133 Miss bool `json:",omitempty"` // cache miss 134 OutputID []byte `json:",omitempty"` 135 Size int64 `json:",omitempty"` // in bytes 136 Time *time.Time `json:",omitempty"` // an Entry.Time; when the object was added to the docs 137 138 // DiskPath is the absolute path on disk of the ObjectID corresponding 139 // a "get" request's ActionID (on cache hit) or a "put" request's 140 // provided ObjectID. 141 DiskPath string `json:",omitempty"` 142 } 143 144 // startCacheProg starts the prog binary (with optional space-separated flags) 145 // and returns a Cache implementation that talks to it. 146 // 147 // It blocks a few seconds to wait for the child process to successfully start 148 // and advertise its capabilities. 149 func startCacheProg(progAndArgs string, fuzzDirCache Cache) Cache { 150 if fuzzDirCache == nil { 151 panic("missing fuzzDirCache") 152 } 153 args, err := quoted.Split(progAndArgs) 154 if err != nil { 155 base.Fatalf("GOCACHEPROG args: %v", err) 156 } 157 var prog string 158 if len(args) > 0 { 159 prog = args[0] 160 args = args[1:] 161 } 162 163 ctx, ctxCancel := context.WithCancel(context.Background()) 164 165 cmd := exec.CommandContext(ctx, prog, args...) 166 out, err := cmd.StdoutPipe() 167 if err != nil { 168 base.Fatalf("StdoutPipe to GOCACHEPROG: %v", err) 169 } 170 in, err := cmd.StdinPipe() 171 if err != nil { 172 base.Fatalf("StdinPipe to GOCACHEPROG: %v", err) 173 } 174 cmd.Stderr = os.Stderr 175 cmd.Cancel = in.Close 176 177 if err := cmd.Start(); err != nil { 178 base.Fatalf("error starting GOCACHEPROG program %q: %v", prog, err) 179 } 180 181 pc := &ProgCache{ 182 ctx: ctx, 183 ctxCancel: ctxCancel, 184 fuzzDirCache: fuzzDirCache, 185 cmd: cmd, 186 stdout: out, 187 stdin: in, 188 bw: bufio.NewWriter(in), 189 inFlight: make(map[int64]chan<- *ProgResponse), 190 outputFile: make(map[OutputID]string), 191 readLoopDone: make(chan struct{}), 192 } 193 194 // Register our interest in the initial protocol message from the child to 195 // us, saying what it can do. 196 capResc := make(chan *ProgResponse, 1) 197 pc.inFlight[0] = capResc 198 199 pc.jenc = json.NewEncoder(pc.bw) 200 go pc.readLoop(pc.readLoopDone) 201 202 // Give the child process a few seconds to report its capabilities. This 203 // should be instant and not require any slow work by the program. 204 timer := time.NewTicker(5 * time.Second) 205 defer timer.Stop() 206 for { 207 select { 208 case <-timer.C: 209 log.Printf("# still waiting for GOCACHEPROG %v ...", prog) 210 case capRes := <-capResc: 211 can := map[ProgCmd]bool{} 212 for _, cmd := range capRes.KnownCommands { 213 can[cmd] = true 214 } 215 if len(can) == 0 { 216 base.Fatalf("GOCACHEPROG %v declared no supported commands", prog) 217 } 218 pc.can = can 219 return pc 220 } 221 } 222 } 223 224 func (c *ProgCache) readLoop(readLoopDone chan<- struct{}) { 225 defer close(readLoopDone) 226 jd := json.NewDecoder(c.stdout) 227 for { 228 res := new(ProgResponse) 229 if err := jd.Decode(res); err != nil { 230 if c.closing.Load() { 231 return // quietly 232 } 233 if err == io.EOF { 234 c.mu.Lock() 235 inFlight := len(c.inFlight) 236 c.mu.Unlock() 237 base.Fatalf("GOCACHEPROG exited pre-Close with %v pending requests", inFlight) 238 } 239 base.Fatalf("error reading JSON from GOCACHEPROG: %v", err) 240 } 241 c.mu.Lock() 242 ch, ok := c.inFlight[res.ID] 243 delete(c.inFlight, res.ID) 244 c.mu.Unlock() 245 if ok { 246 ch <- res 247 } else { 248 base.Fatalf("GOCACHEPROG sent response for unknown request ID %v", res.ID) 249 } 250 } 251 } 252 253 func (c *ProgCache) send(ctx context.Context, req *ProgRequest) (*ProgResponse, error) { 254 resc := make(chan *ProgResponse, 1) 255 if err := c.writeToChild(req, resc); err != nil { 256 return nil, err 257 } 258 select { 259 case res := <-resc: 260 if res.Err != "" { 261 return nil, errors.New(res.Err) 262 } 263 return res, nil 264 case <-ctx.Done(): 265 return nil, ctx.Err() 266 } 267 } 268 269 func (c *ProgCache) writeToChild(req *ProgRequest, resc chan<- *ProgResponse) (err error) { 270 c.mu.Lock() 271 c.nextID++ 272 req.ID = c.nextID 273 c.inFlight[req.ID] = resc 274 c.mu.Unlock() 275 276 defer func() { 277 if err != nil { 278 c.mu.Lock() 279 delete(c.inFlight, req.ID) 280 c.mu.Unlock() 281 } 282 }() 283 284 c.writeMu.Lock() 285 defer c.writeMu.Unlock() 286 287 if err := c.jenc.Encode(req); err != nil { 288 return err 289 } 290 if err := c.bw.WriteByte('\n'); err != nil { 291 return err 292 } 293 if req.Body != nil && req.BodySize > 0 { 294 if err := c.bw.WriteByte('"'); err != nil { 295 return err 296 } 297 e := base64.NewEncoder(base64.StdEncoding, c.bw) 298 wrote, err := io.Copy(e, req.Body) 299 if err != nil { 300 return err 301 } 302 if err := e.Close(); err != nil { 303 return nil 304 } 305 if wrote != req.BodySize { 306 return fmt.Errorf("short write writing body to GOCACHEPROG for action %x, object %x: wrote %v; expected %v", 307 req.ActionID, req.ObjectID, wrote, req.BodySize) 308 } 309 if _, err := c.bw.WriteString("\"\n"); err != nil { 310 return err 311 } 312 } 313 if err := c.bw.Flush(); err != nil { 314 return err 315 } 316 return nil 317 } 318 319 func (c *ProgCache) Get(a ActionID) (Entry, error) { 320 if !c.can[cmdGet] { 321 // They can't do a "get". Maybe they're a write-only cache. 322 // 323 // TODO(bradfitz,bcmills): figure out the proper error type here. Maybe 324 // errors.ErrUnsupported? Is entryNotFoundError even appropriate? There 325 // might be places where we rely on the fact that a recent Put can be 326 // read through a corresponding Get. Audit callers and check, and document 327 // error types on the Cache interface. 328 return Entry{}, &entryNotFoundError{} 329 } 330 res, err := c.send(c.ctx, &ProgRequest{ 331 Command: cmdGet, 332 ActionID: a[:], 333 }) 334 if err != nil { 335 return Entry{}, err // TODO(bradfitz): or entryNotFoundError? Audit callers. 336 } 337 if res.Miss { 338 return Entry{}, &entryNotFoundError{} 339 } 340 e := Entry{ 341 Size: res.Size, 342 } 343 if res.Time != nil { 344 e.Time = *res.Time 345 } else { 346 e.Time = time.Now() 347 } 348 if res.DiskPath == "" { 349 return Entry{}, &entryNotFoundError{errors.New("GOCACHEPROG didn't populate DiskPath on get hit")} 350 } 351 if copy(e.OutputID[:], res.OutputID) != len(res.OutputID) { 352 return Entry{}, &entryNotFoundError{errors.New("incomplete ProgResponse OutputID")} 353 } 354 c.noteOutputFile(e.OutputID, res.DiskPath) 355 return e, nil 356 } 357 358 func (c *ProgCache) noteOutputFile(o OutputID, diskPath string) { 359 c.mu.Lock() 360 defer c.mu.Unlock() 361 c.outputFile[o] = diskPath 362 } 363 364 func (c *ProgCache) OutputFile(o OutputID) string { 365 c.mu.Lock() 366 defer c.mu.Unlock() 367 return c.outputFile[o] 368 } 369 370 func (c *ProgCache) Put(a ActionID, file io.ReadSeeker) (_ OutputID, size int64, _ error) { 371 // Compute output ID. 372 h := sha256.New() 373 if _, err := file.Seek(0, 0); err != nil { 374 return OutputID{}, 0, err 375 } 376 size, err := io.Copy(h, file) 377 if err != nil { 378 return OutputID{}, 0, err 379 } 380 var out OutputID 381 h.Sum(out[:0]) 382 383 if _, err := file.Seek(0, 0); err != nil { 384 return OutputID{}, 0, err 385 } 386 387 if !c.can[cmdPut] { 388 // Child is a read-only cache. Do nothing. 389 return out, size, nil 390 } 391 392 res, err := c.send(c.ctx, &ProgRequest{ 393 Command: cmdPut, 394 ActionID: a[:], 395 ObjectID: out[:], 396 Body: file, 397 BodySize: size, 398 }) 399 if err != nil { 400 return OutputID{}, 0, err 401 } 402 if res.DiskPath == "" { 403 return OutputID{}, 0, errors.New("GOCACHEPROG didn't return DiskPath in put response") 404 } 405 c.noteOutputFile(out, res.DiskPath) 406 return out, size, err 407 } 408 409 func (c *ProgCache) Close() error { 410 c.closing.Store(true) 411 var err error 412 413 // First write a "close" message to the child so it can exit nicely 414 // and clean up if it wants. Only after that exchange do we cancel 415 // the context that kills the process. 416 if c.can[cmdClose] { 417 _, err = c.send(c.ctx, &ProgRequest{Command: cmdClose}) 418 } 419 c.ctxCancel() 420 <-c.readLoopDone 421 return err 422 } 423 424 func (c *ProgCache) FuzzDir() string { 425 // TODO(bradfitz): figure out what to do here. For now just use the 426 // disk-based default. 427 return c.fuzzDirCache.FuzzDir() 428 }