github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/cache/prog.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"bufio"
     9  	"context"
    10  	"crypto/sha256"
    11  	"encoding/base64"
    12  	"encoding/json"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  	"log"
    17  	"os"
    18  	"os/exec"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/go-asm/go/cmd/go/base"
    24  	"github.com/go-asm/go/cmd/quoted"
    25  )
    26  
    27  // ProgCache implements Cache via JSON messages over stdin/stdout to a child
    28  // helper process which can then implement whatever caching policy/mechanism it
    29  // wants.
    30  //
    31  // See https://github.com/golang/go/issues/59719
    32  type ProgCache struct {
    33  	cmd    *exec.Cmd
    34  	stdout io.ReadCloser  // from the child process
    35  	stdin  io.WriteCloser // to the child process
    36  	bw     *bufio.Writer  // to stdin
    37  	jenc   *json.Encoder  // to bw
    38  
    39  	// can are the commands that the child process declared that it supports.
    40  	// This is effectively the versioning mechanism.
    41  	can map[ProgCmd]bool
    42  
    43  	// fuzzDirCache is another Cache implementation to use for the FuzzDir
    44  	// method. In practice this is the default GOCACHE disk-based
    45  	// implementation.
    46  	//
    47  	// TODO(bradfitz): maybe this isn't ideal. But we'd need to extend the Cache
    48  	// interface and the fuzzing callers to be less disk-y to do more here.
    49  	fuzzDirCache Cache
    50  
    51  	closing      atomic.Bool
    52  	ctx          context.Context    // valid until Close via ctxClose
    53  	ctxCancel    context.CancelFunc // called on Close
    54  	readLoopDone chan struct{}      // closed when readLoop returns
    55  
    56  	mu         sync.Mutex // guards following fields
    57  	nextID     int64
    58  	inFlight   map[int64]chan<- *ProgResponse
    59  	outputFile map[OutputID]string // object => abs path on disk
    60  
    61  	// writeMu serializes writing to the child process.
    62  	// It must never be held at the same time as mu.
    63  	writeMu sync.Mutex
    64  }
    65  
    66  // ProgCmd is a command that can be issued to a child process.
    67  //
    68  // If the interface needs to grow, we can add new commands or new versioned
    69  // commands like "get2".
    70  type ProgCmd string
    71  
    72  const (
    73  	cmdGet   = ProgCmd("get")
    74  	cmdPut   = ProgCmd("put")
    75  	cmdClose = ProgCmd("close")
    76  )
    77  
    78  // ProgRequest is the JSON-encoded message that's sent from cmd/go to
    79  // the GOCACHEPROG child process over stdin. Each JSON object is on its
    80  // own line. A ProgRequest of Type "put" with BodySize > 0 will be followed
    81  // by a line containing a base64-encoded JSON string literal of the body.
    82  type ProgRequest struct {
    83  	// ID is a unique number per process across all requests.
    84  	// It must be echoed in the ProgResponse from the child.
    85  	ID int64
    86  
    87  	// Command is the type of request.
    88  	// The cmd/go tool will only send commands that were declared
    89  	// as supported by the child.
    90  	Command ProgCmd
    91  
    92  	// ActionID is non-nil for get and puts.
    93  	ActionID []byte `json:",omitempty"` // or nil if not used
    94  
    95  	// ObjectID is set for Type "put" and "output-file".
    96  	ObjectID []byte `json:",omitempty"` // or nil if not used
    97  
    98  	// Body is the body for "put" requests. It's sent after the JSON object
    99  	// as a base64-encoded JSON string when BodySize is non-zero.
   100  	// It's sent as a separate JSON value instead of being a struct field
   101  	// send in this JSON object so large values can be streamed in both directions.
   102  	// The base64 string body of a ProgRequest will always be written
   103  	// immediately after the JSON object and a newline.
   104  	Body io.Reader `json:"-"`
   105  
   106  	// BodySize is the number of bytes of Body. If zero, the body isn't written.
   107  	BodySize int64 `json:",omitempty"`
   108  }
   109  
   110  // ProgResponse is the JSON response from the child process to cmd/go.
   111  //
   112  // With the exception of the first protocol message that the child writes to its
   113  // stdout with ID==0 and KnownCommands populated, these are only sent in
   114  // response to a ProgRequest from cmd/go.
   115  //
   116  // ProgResponses can be sent in any order. The ID must match the request they're
   117  // replying to.
   118  type ProgResponse struct {
   119  	ID  int64  // that corresponds to ProgRequest; they can be answered out of order
   120  	Err string `json:",omitempty"` // if non-empty, the error
   121  
   122  	// KnownCommands is included in the first message that cache helper program
   123  	// writes to stdout on startup (with ID==0). It includes the
   124  	// ProgRequest.Command types that are supported by the program.
   125  	//
   126  	// This lets us extend the protocol gracefully over time (adding "get2",
   127  	// etc), or fail gracefully when needed. It also lets us verify the program
   128  	// wants to be a cache helper.
   129  	KnownCommands []ProgCmd `json:",omitempty"`
   130  
   131  	// For Get requests.
   132  
   133  	Miss     bool       `json:",omitempty"` // cache miss
   134  	OutputID []byte     `json:",omitempty"`
   135  	Size     int64      `json:",omitempty"` // in bytes
   136  	Time     *time.Time `json:",omitempty"` // an Entry.Time; when the object was added to the docs
   137  
   138  	// DiskPath is the absolute path on disk of the ObjectID corresponding
   139  	// a "get" request's ActionID (on cache hit) or a "put" request's
   140  	// provided ObjectID.
   141  	DiskPath string `json:",omitempty"`
   142  }
   143  
   144  // startCacheProg starts the prog binary (with optional space-separated flags)
   145  // and returns a Cache implementation that talks to it.
   146  //
   147  // It blocks a few seconds to wait for the child process to successfully start
   148  // and advertise its capabilities.
   149  func startCacheProg(progAndArgs string, fuzzDirCache Cache) Cache {
   150  	if fuzzDirCache == nil {
   151  		panic("missing fuzzDirCache")
   152  	}
   153  	args, err := quoted.Split(progAndArgs)
   154  	if err != nil {
   155  		base.Fatalf("GOCACHEPROG args: %v", err)
   156  	}
   157  	var prog string
   158  	if len(args) > 0 {
   159  		prog = args[0]
   160  		args = args[1:]
   161  	}
   162  
   163  	ctx, ctxCancel := context.WithCancel(context.Background())
   164  
   165  	cmd := exec.CommandContext(ctx, prog, args...)
   166  	out, err := cmd.StdoutPipe()
   167  	if err != nil {
   168  		base.Fatalf("StdoutPipe to GOCACHEPROG: %v", err)
   169  	}
   170  	in, err := cmd.StdinPipe()
   171  	if err != nil {
   172  		base.Fatalf("StdinPipe to GOCACHEPROG: %v", err)
   173  	}
   174  	cmd.Stderr = os.Stderr
   175  	cmd.Cancel = in.Close
   176  
   177  	if err := cmd.Start(); err != nil {
   178  		base.Fatalf("error starting GOCACHEPROG program %q: %v", prog, err)
   179  	}
   180  
   181  	pc := &ProgCache{
   182  		ctx:          ctx,
   183  		ctxCancel:    ctxCancel,
   184  		fuzzDirCache: fuzzDirCache,
   185  		cmd:          cmd,
   186  		stdout:       out,
   187  		stdin:        in,
   188  		bw:           bufio.NewWriter(in),
   189  		inFlight:     make(map[int64]chan<- *ProgResponse),
   190  		outputFile:   make(map[OutputID]string),
   191  		readLoopDone: make(chan struct{}),
   192  	}
   193  
   194  	// Register our interest in the initial protocol message from the child to
   195  	// us, saying what it can do.
   196  	capResc := make(chan *ProgResponse, 1)
   197  	pc.inFlight[0] = capResc
   198  
   199  	pc.jenc = json.NewEncoder(pc.bw)
   200  	go pc.readLoop(pc.readLoopDone)
   201  
   202  	// Give the child process a few seconds to report its capabilities. This
   203  	// should be instant and not require any slow work by the program.
   204  	timer := time.NewTicker(5 * time.Second)
   205  	defer timer.Stop()
   206  	for {
   207  		select {
   208  		case <-timer.C:
   209  			log.Printf("# still waiting for GOCACHEPROG %v ...", prog)
   210  		case capRes := <-capResc:
   211  			can := map[ProgCmd]bool{}
   212  			for _, cmd := range capRes.KnownCommands {
   213  				can[cmd] = true
   214  			}
   215  			if len(can) == 0 {
   216  				base.Fatalf("GOCACHEPROG %v declared no supported commands", prog)
   217  			}
   218  			pc.can = can
   219  			return pc
   220  		}
   221  	}
   222  }
   223  
   224  func (c *ProgCache) readLoop(readLoopDone chan<- struct{}) {
   225  	defer close(readLoopDone)
   226  	jd := json.NewDecoder(c.stdout)
   227  	for {
   228  		res := new(ProgResponse)
   229  		if err := jd.Decode(res); err != nil {
   230  			if c.closing.Load() {
   231  				return // quietly
   232  			}
   233  			if err == io.EOF {
   234  				c.mu.Lock()
   235  				inFlight := len(c.inFlight)
   236  				c.mu.Unlock()
   237  				base.Fatalf("GOCACHEPROG exited pre-Close with %v pending requests", inFlight)
   238  			}
   239  			base.Fatalf("error reading JSON from GOCACHEPROG: %v", err)
   240  		}
   241  		c.mu.Lock()
   242  		ch, ok := c.inFlight[res.ID]
   243  		delete(c.inFlight, res.ID)
   244  		c.mu.Unlock()
   245  		if ok {
   246  			ch <- res
   247  		} else {
   248  			base.Fatalf("GOCACHEPROG sent response for unknown request ID %v", res.ID)
   249  		}
   250  	}
   251  }
   252  
   253  func (c *ProgCache) send(ctx context.Context, req *ProgRequest) (*ProgResponse, error) {
   254  	resc := make(chan *ProgResponse, 1)
   255  	if err := c.writeToChild(req, resc); err != nil {
   256  		return nil, err
   257  	}
   258  	select {
   259  	case res := <-resc:
   260  		if res.Err != "" {
   261  			return nil, errors.New(res.Err)
   262  		}
   263  		return res, nil
   264  	case <-ctx.Done():
   265  		return nil, ctx.Err()
   266  	}
   267  }
   268  
   269  func (c *ProgCache) writeToChild(req *ProgRequest, resc chan<- *ProgResponse) (err error) {
   270  	c.mu.Lock()
   271  	c.nextID++
   272  	req.ID = c.nextID
   273  	c.inFlight[req.ID] = resc
   274  	c.mu.Unlock()
   275  
   276  	defer func() {
   277  		if err != nil {
   278  			c.mu.Lock()
   279  			delete(c.inFlight, req.ID)
   280  			c.mu.Unlock()
   281  		}
   282  	}()
   283  
   284  	c.writeMu.Lock()
   285  	defer c.writeMu.Unlock()
   286  
   287  	if err := c.jenc.Encode(req); err != nil {
   288  		return err
   289  	}
   290  	if err := c.bw.WriteByte('\n'); err != nil {
   291  		return err
   292  	}
   293  	if req.Body != nil && req.BodySize > 0 {
   294  		if err := c.bw.WriteByte('"'); err != nil {
   295  			return err
   296  		}
   297  		e := base64.NewEncoder(base64.StdEncoding, c.bw)
   298  		wrote, err := io.Copy(e, req.Body)
   299  		if err != nil {
   300  			return err
   301  		}
   302  		if err := e.Close(); err != nil {
   303  			return nil
   304  		}
   305  		if wrote != req.BodySize {
   306  			return fmt.Errorf("short write writing body to GOCACHEPROG for action %x, object %x: wrote %v; expected %v",
   307  				req.ActionID, req.ObjectID, wrote, req.BodySize)
   308  		}
   309  		if _, err := c.bw.WriteString("\"\n"); err != nil {
   310  			return err
   311  		}
   312  	}
   313  	if err := c.bw.Flush(); err != nil {
   314  		return err
   315  	}
   316  	return nil
   317  }
   318  
   319  func (c *ProgCache) Get(a ActionID) (Entry, error) {
   320  	if !c.can[cmdGet] {
   321  		// They can't do a "get". Maybe they're a write-only cache.
   322  		//
   323  		// TODO(bradfitz,bcmills): figure out the proper error type here. Maybe
   324  		// errors.ErrUnsupported? Is entryNotFoundError even appropriate? There
   325  		// might be places where we rely on the fact that a recent Put can be
   326  		// read through a corresponding Get. Audit callers and check, and document
   327  		// error types on the Cache interface.
   328  		return Entry{}, &entryNotFoundError{}
   329  	}
   330  	res, err := c.send(c.ctx, &ProgRequest{
   331  		Command:  cmdGet,
   332  		ActionID: a[:],
   333  	})
   334  	if err != nil {
   335  		return Entry{}, err // TODO(bradfitz): or entryNotFoundError? Audit callers.
   336  	}
   337  	if res.Miss {
   338  		return Entry{}, &entryNotFoundError{}
   339  	}
   340  	e := Entry{
   341  		Size: res.Size,
   342  	}
   343  	if res.Time != nil {
   344  		e.Time = *res.Time
   345  	} else {
   346  		e.Time = time.Now()
   347  	}
   348  	if res.DiskPath == "" {
   349  		return Entry{}, &entryNotFoundError{errors.New("GOCACHEPROG didn't populate DiskPath on get hit")}
   350  	}
   351  	if copy(e.OutputID[:], res.OutputID) != len(res.OutputID) {
   352  		return Entry{}, &entryNotFoundError{errors.New("incomplete ProgResponse OutputID")}
   353  	}
   354  	c.noteOutputFile(e.OutputID, res.DiskPath)
   355  	return e, nil
   356  }
   357  
   358  func (c *ProgCache) noteOutputFile(o OutputID, diskPath string) {
   359  	c.mu.Lock()
   360  	defer c.mu.Unlock()
   361  	c.outputFile[o] = diskPath
   362  }
   363  
   364  func (c *ProgCache) OutputFile(o OutputID) string {
   365  	c.mu.Lock()
   366  	defer c.mu.Unlock()
   367  	return c.outputFile[o]
   368  }
   369  
   370  func (c *ProgCache) Put(a ActionID, file io.ReadSeeker) (_ OutputID, size int64, _ error) {
   371  	// Compute output ID.
   372  	h := sha256.New()
   373  	if _, err := file.Seek(0, 0); err != nil {
   374  		return OutputID{}, 0, err
   375  	}
   376  	size, err := io.Copy(h, file)
   377  	if err != nil {
   378  		return OutputID{}, 0, err
   379  	}
   380  	var out OutputID
   381  	h.Sum(out[:0])
   382  
   383  	if _, err := file.Seek(0, 0); err != nil {
   384  		return OutputID{}, 0, err
   385  	}
   386  
   387  	if !c.can[cmdPut] {
   388  		// Child is a read-only cache. Do nothing.
   389  		return out, size, nil
   390  	}
   391  
   392  	res, err := c.send(c.ctx, &ProgRequest{
   393  		Command:  cmdPut,
   394  		ActionID: a[:],
   395  		ObjectID: out[:],
   396  		Body:     file,
   397  		BodySize: size,
   398  	})
   399  	if err != nil {
   400  		return OutputID{}, 0, err
   401  	}
   402  	if res.DiskPath == "" {
   403  		return OutputID{}, 0, errors.New("GOCACHEPROG didn't return DiskPath in put response")
   404  	}
   405  	c.noteOutputFile(out, res.DiskPath)
   406  	return out, size, err
   407  }
   408  
   409  func (c *ProgCache) Close() error {
   410  	c.closing.Store(true)
   411  	var err error
   412  
   413  	// First write a "close" message to the child so it can exit nicely
   414  	// and clean up if it wants. Only after that exchange do we cancel
   415  	// the context that kills the process.
   416  	if c.can[cmdClose] {
   417  		_, err = c.send(c.ctx, &ProgRequest{Command: cmdClose})
   418  	}
   419  	c.ctxCancel()
   420  	<-c.readLoopDone
   421  	return err
   422  }
   423  
   424  func (c *ProgCache) FuzzDir() string {
   425  	// TODO(bradfitz): figure out what to do here. For now just use the
   426  	// disk-based default.
   427  	return c.fuzzDirCache.FuzzDir()
   428  }