github.phpd.cn/thought-machine/please@v12.2.0+incompatible/src/build/worker.go (about)

     1  // +build !bootstrap
     2  
     3  // Contains functions related to dispatching work to remote processes.
     4  // Right now those processes must be on the same box because they use
     5  // the local temporary directories, but in the future this might form
     6  // a foundation for doing real distributed work.
     7  
     8  package build
     9  
    10  import (
    11  	"encoding/binary"
    12  	"fmt"
    13  	"io"
    14  	"os/exec"
    15  	"path"
    16  	"strings"
    17  	"sync"
    18  
    19  	"github.com/golang/protobuf/proto"
    20  	"github.com/google/shlex"
    21  
    22  	pb "build/proto/worker"
    23  	"core"
    24  )
    25  
    26  // A workerServer is the structure we use to maintain information about a remote work server.
    27  type workerServer struct {
    28  	requests      chan *pb.BuildRequest
    29  	responses     map[string]chan *pb.BuildResponse
    30  	responseMutex sync.Mutex
    31  	process       *exec.Cmd
    32  	stderr        *stderrLogger
    33  	closing       bool
    34  }
    35  
    36  // workerMap contains all the remote workers we've started so far.
    37  var workerMap = map[string]*workerServer{}
    38  var workerMutex sync.Mutex
    39  
    40  // buildMaybeRemotely builds a target, either sending it to a remote worker if needed,
    41  // or locally if not.
    42  func buildMaybeRemotely(state *core.BuildState, target *core.BuildTarget, inputHash []byte) ([]byte, error) {
    43  	worker, workerArgs, localCmd := workerCommandAndArgs(state, target)
    44  	if worker == "" {
    45  		return runBuildCommand(state, target, localCmd, inputHash)
    46  	}
    47  	// The scheme here is pretty minimal; remote workers currently have quite a bit less info than
    48  	// local ones get. Over time we'll probably evolve it to add more information.
    49  	opts, err := shlex.Split(workerArgs)
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  	log.Debug("Sending remote build request to %s; opts %s", worker, workerArgs)
    54  	resp, err := buildRemotely(state.Config, worker, &pb.BuildRequest{
    55  		Rule:    target.Label.String(),
    56  		Labels:  target.Labels,
    57  		TempDir: path.Join(core.RepoRoot, target.TmpDir()),
    58  		Srcs:    target.AllSourcePaths(state.Graph),
    59  		Opts:    opts,
    60  	})
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  	out := strings.Join(resp.Messages, "\n")
    65  	if !resp.Success {
    66  		return nil, fmt.Errorf("Error building target %s: %s", target.Label, out)
    67  	}
    68  	// Okay, now we might need to do something locally too...
    69  	if localCmd != "" {
    70  		out2, err := runBuildCommand(state, target, localCmd, inputHash)
    71  		return append([]byte(out+"\n"), out2...), err
    72  	}
    73  	return []byte(out), nil
    74  }
    75  
    76  // buildRemotely runs a single build request and returns its response.
    77  func buildRemotely(config *core.Configuration, worker string, req *pb.BuildRequest) (*pb.BuildResponse, error) {
    78  	w, err := getOrStartWorker(config, worker)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  	w.requests <- req
    83  	ch := make(chan *pb.BuildResponse, 1)
    84  	w.responseMutex.Lock()
    85  	w.responses[req.Rule] = ch
    86  	w.responseMutex.Unlock()
    87  	response := <-ch
    88  	return response, nil
    89  }
    90  
    91  // getOrStartWorker either retrieves an existing worker process or starts a new one.
    92  func getOrStartWorker(config *core.Configuration, worker string) (*workerServer, error) {
    93  	workerMutex.Lock()
    94  	defer workerMutex.Unlock()
    95  	if w, present := workerMap[worker]; present {
    96  		return w, nil
    97  	}
    98  	// Need to create a new process
    99  	cmd := core.ExecCommand(worker)
   100  	cmd.Env = core.GeneralBuildEnvironment(config)
   101  	stdin, err := cmd.StdinPipe()
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	stdout, err := cmd.StdoutPipe()
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	stderr := &stderrLogger{}
   110  	cmd.Stderr = stderr
   111  	if err := cmd.Start(); err != nil {
   112  		return nil, err
   113  	}
   114  	w := &workerServer{
   115  		requests:  make(chan *pb.BuildRequest),
   116  		responses: map[string]chan *pb.BuildResponse{},
   117  		process:   cmd,
   118  		stderr:    stderr,
   119  	}
   120  	go w.sendRequests(stdin)
   121  	go w.readResponses(stdout)
   122  	go w.wait()
   123  	workerMap[worker] = w
   124  	return w, nil
   125  }
   126  
   127  // sendRequests sends requests to a running worker server.
   128  func (w *workerServer) sendRequests(stdin io.Writer) {
   129  	for request := range w.requests {
   130  		b, err := proto.Marshal(request)
   131  		if err != nil { // This shouldn't really happen
   132  			log.Error("Failed to serialise request: %s", err)
   133  			continue
   134  		}
   135  		// Protos can't be streamed so we have to do our own framing.
   136  		binary.Write(stdin, binary.LittleEndian, int32(len(b)))
   137  		stdin.Write(b)
   138  	}
   139  }
   140  
   141  // readResponses reads the responses from a running worker server and dispatches them appropriately.
   142  func (w *workerServer) readResponses(stdout io.Reader) {
   143  	var size int32
   144  	for {
   145  		if err := binary.Read(stdout, binary.LittleEndian, &size); err != nil {
   146  			w.Error("Failed to read response: %s", err)
   147  			break
   148  		}
   149  		buf := make([]byte, size)
   150  		if _, err := stdout.Read(buf); err != nil {
   151  			w.Error("Failed to read response: %s", err)
   152  			break
   153  		}
   154  		response := pb.BuildResponse{}
   155  		if err := proto.Unmarshal(buf, &response); err != nil {
   156  			w.Error("Error unmarshaling response: %s", err)
   157  			continue
   158  		}
   159  		w.responseMutex.Lock()
   160  		ch, present := w.responses[response.Rule]
   161  		delete(w.responses, response.Rule)
   162  		w.responseMutex.Unlock()
   163  		if present {
   164  			log.Debug("Got response from remote worker for %s, success: %v", response.Rule, response.Success)
   165  			ch <- &response
   166  		} else {
   167  			w.Error("Couldn't find response channel for %s", response.Rule)
   168  		}
   169  	}
   170  }
   171  
   172  // wait waits for the process to terminate. If it dies unexpectedly this handles various failures.
   173  func (w *workerServer) wait() {
   174  	if err := w.process.Wait(); err != nil && !w.closing {
   175  		log.Error("Worker process died unexpectedly: %s", err)
   176  		w.responseMutex.Lock()
   177  		for label, ch := range w.responses {
   178  			ch <- &pb.BuildResponse{
   179  				Rule:     label,
   180  				Messages: []string{fmt.Sprintf("Worker failed: %s\n%s", err, string(w.stderr.History))},
   181  			}
   182  		}
   183  		w.responseMutex.Unlock()
   184  
   185  	}
   186  }
   187  
   188  func (w *workerServer) Error(msg string, args ...interface{}) {
   189  	if !w.closing {
   190  		log.Error(msg, args...)
   191  	}
   192  }
   193  
   194  // stderrLogger is used to log any errors from our worker tools.
   195  type stderrLogger struct {
   196  	buffer  []byte
   197  	History []byte
   198  	// suppress will silence any further logging messages when set.
   199  	Suppress bool
   200  }
   201  
   202  // Write implements the io.Writer interface
   203  func (l *stderrLogger) Write(msg []byte) (int, error) {
   204  	l.buffer = append(l.buffer, msg...)
   205  	if len(l.buffer) > 0 && l.buffer[len(l.buffer)-1] == '\n' {
   206  		if !l.Suppress {
   207  			log.Error("Error from remote worker: %s", strings.TrimSpace(string(l.buffer)))
   208  		}
   209  		l.History = append(l.History, l.buffer...)
   210  		l.buffer = nil
   211  	}
   212  	return len(msg), nil
   213  }
   214  
   215  // StopWorkers stops any running worker processes.
   216  func StopWorkers() {
   217  	for name, worker := range workerMap {
   218  		log.Debug("Killing build worker %s", name)
   219  		worker.closing = true         // suppress any error messages from worker
   220  		worker.stderr.Suppress = true // Make sure we don't print anything as they die.
   221  		worker.process.Process.Kill()
   222  	}
   223  }