golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/rendezvous/rendezvous.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rendezvous
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"log"
    11  	"net"
    12  	"net/http"
    13  	"sync"
    14  	"time"
    15  
    16  	"golang.org/x/build/buildlet"
    17  	"golang.org/x/build/internal"
    18  	"golang.org/x/build/revdial/v2"
    19  	"google.golang.org/api/idtoken"
    20  )
    21  
    22  // result contains the result for a waiting instance registration.
    23  type result struct {
    24  	bc  buildlet.Client
    25  	err error
    26  }
    27  
    28  // entry contains the elements needed to process an instance registration.
    29  type entry struct {
    30  	deadline time.Time
    31  	ch       chan *result
    32  }
    33  
    34  // TokenValidator verifies if a token is valid.
    35  type TokenValidator func(ctx context.Context, jwt string) bool
    36  
    37  // Rendezvous waits for buildlets to connect, verifies they are valid instances
    38  // and passes the connection to the waiting caller.
    39  type Rendezvous struct {
    40  	mu sync.Mutex
    41  
    42  	m         map[string]*entry
    43  	validator TokenValidator
    44  }
    45  
    46  // Option is an optional configuration setting.
    47  type Option func(*Rendezvous)
    48  
    49  // OptionValidator changes the verifier used by Rendezvous.
    50  func OptionValidator(v TokenValidator) Option {
    51  	return func(rdv *Rendezvous) {
    52  		rdv.validator = v
    53  	}
    54  }
    55  
    56  // New creates a Rendezvous element. The context that is passed in should be non-canceled
    57  // during the lifetime of the running service.
    58  func New(ctx context.Context, opts ...Option) *Rendezvous {
    59  	rdv := &Rendezvous{
    60  		m:         make(map[string]*entry),
    61  		validator: validateLUCIIDToken,
    62  	}
    63  	for _, opt := range opts {
    64  		opt(rdv)
    65  	}
    66  	go internal.PeriodicallyDo(ctx, 10*time.Second, func(ctx context.Context, t time.Time) {
    67  		rdv.purgeExpiredRegistrations()
    68  	})
    69  	return rdv
    70  }
    71  
    72  // purgeExpiredRegistrations will purge expired registrations.
    73  func (rdv *Rendezvous) purgeExpiredRegistrations() {
    74  	rdv.mu.Lock()
    75  	for id, ent := range rdv.m {
    76  		if time.Now().After(ent.deadline) {
    77  			log.Printf("rendezvous: stopped waiting for instance=%q due to timeout", id)
    78  			ent.ch <- &result{err: fmt.Errorf("timed out waiting for rendezvous client=%q", id)}
    79  			delete(rdv.m, id)
    80  		}
    81  	}
    82  	rdv.mu.Unlock()
    83  }
    84  
    85  // RegisterInstance notes an instance and waits for that instance to connect to the handler. An
    86  // instance must be registered before the instance can attempt to connect. If an instance does
    87  // not connect before the end of the wait period, the instance will not be able to connect.
    88  func (rdv *Rendezvous) RegisterInstance(ctx context.Context, id string, wait time.Duration) {
    89  	rdv.mu.Lock()
    90  	rdv.m[id] = &entry{
    91  		deadline: time.Now().Add(wait),
    92  		ch:       make(chan *result, 1),
    93  	}
    94  	rdv.mu.Unlock()
    95  }
    96  
    97  // DeregisterInstance removes the registration for an instance which has been
    98  // previously registered.
    99  func (rdv *Rendezvous) DeregisterInstance(ctx context.Context, id string) {
   100  	rdv.mu.Lock()
   101  	delete(rdv.m, id)
   102  	rdv.mu.Unlock()
   103  }
   104  
   105  // WaitForInstance waits for the registered instance to successfully connect. It waits for the
   106  // lifetime of the context. If the instance is not registered or has exceeded the timeout period,
   107  // it will immediately return an error.
   108  func (rdv *Rendezvous) WaitForInstance(ctx context.Context, id string) (buildlet.Client, error) {
   109  	rdv.mu.Lock()
   110  	e, ok := rdv.m[id]
   111  	rdv.mu.Unlock()
   112  	if !ok {
   113  		return nil, fmt.Errorf("instance not found: name=%q", id)
   114  	}
   115  	select {
   116  	case <-ctx.Done():
   117  		rdv.mu.Lock()
   118  		delete(rdv.m, id)
   119  		rdv.mu.Unlock()
   120  		return nil, fmt.Errorf("context timeout waiting for rendezvous client=%q: %w", id, ctx.Err())
   121  	case res := <-e.ch:
   122  		rdv.mu.Lock()
   123  		delete(rdv.m, id)
   124  		close(e.ch)
   125  		rdv.mu.Unlock()
   126  		return res.bc, res.err
   127  	}
   128  }
   129  
   130  const (
   131  	// HeaderID is the HTTP header used for passing the gomote ID.
   132  	HeaderID = "X-Go-Gomote-ID"
   133  	// HeaderToken is the HTTP header used for passing in the authentication token.
   134  	HeaderToken = "X-Go-Swarming-Auth-Token"
   135  	// HeaderHostname is the HTTP header used for passing in the hostname.
   136  	HeaderHostname = "X-Go-Hostname"
   137  )
   138  
   139  // HandleReverse handles HTTP requests from the buildlet and passes the connection to
   140  // the waiter.
   141  func (rdv *Rendezvous) HandleReverse(w http.ResponseWriter, r *http.Request) {
   142  	if r.TLS == nil {
   143  		http.Error(w, "buildlet registration requires SSL", http.StatusInternalServerError)
   144  		return
   145  	}
   146  	var (
   147  		id        = r.Header.Get(HeaderID)
   148  		authToken = r.Header.Get(HeaderToken)
   149  		hostname  = r.Header.Get(HeaderHostname)
   150  	)
   151  	if hostname == "" {
   152  		http.Error(w, "missing X-Go-Hostname header", http.StatusBadRequest)
   153  		return
   154  	}
   155  	if id == "" {
   156  		http.Error(w, "missing X-Go-Gomote-ID header", http.StatusBadRequest)
   157  		return
   158  	}
   159  	if authToken == "" {
   160  		http.Error(w, "missing X-Go-Swarming-Auth-Token header", http.StatusBadRequest)
   161  		return
   162  	}
   163  	rdv.mu.Lock()
   164  	res, ok := rdv.m[id]
   165  	rdv.mu.Unlock()
   166  
   167  	if !ok {
   168  		http.Error(w, "not expecting buildlet client", http.StatusPreconditionFailed)
   169  		return
   170  	}
   171  	if !rdv.validator(r.Context(), authToken) {
   172  		log.Printf("rendezvous: Unable to validate authentication token id=%s", id)
   173  		http.Error(w, "invalid authentication Token", http.StatusPreconditionFailed)
   174  		return
   175  	}
   176  	hj, ok := w.(http.Hijacker)
   177  	if !ok {
   178  		http.Error(w, "webserver does not support hijacking", http.StatusHTTPVersionNotSupported)
   179  		return
   180  	}
   181  	conn, _, err := hj.Hijack()
   182  	if err != nil {
   183  		http.Error(w, err.Error(), http.StatusInternalServerError)
   184  		res.ch <- &result{err: err}
   185  		return
   186  	}
   187  	bc, err := connToClient(conn, hostname, "swarming_task")
   188  	if err != nil {
   189  		log.Printf("rendezvous: unable to create buildlet client: %s", err)
   190  		conn.Close()
   191  		res.ch <- &result{err: err}
   192  		return
   193  	}
   194  	res.ch <- &result{bc: bc}
   195  }
   196  
   197  func connToClient(conn net.Conn, hostname, hostType string) (buildlet.Client, error) {
   198  	if err := (&http.Response{StatusCode: http.StatusSwitchingProtocols, Proto: "HTTP/1.1"}).Write(conn); err != nil {
   199  		log.Printf("gomote: error writing upgrade response to reverse buildlet %s (%s) at %s: %v", hostname, hostType, conn.RemoteAddr(), err)
   200  		conn.Close()
   201  		return nil, err
   202  	}
   203  	revDialer := revdial.NewDialer(conn, "/revdial")
   204  	revDialerDone := revDialer.Done()
   205  	dialer := revDialer.Dial
   206  
   207  	client := buildlet.NewClient(conn.RemoteAddr().String(), buildlet.NoKeyPair)
   208  	client.SetHTTPClient(&http.Client{
   209  		Transport: &http.Transport{
   210  			DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
   211  				return dialer(ctx)
   212  			},
   213  		},
   214  	})
   215  	client.SetDialer(dialer)
   216  	client.SetDescription(fmt.Sprintf("reverse peer %s/%s for host type %v", hostname, conn.RemoteAddr(), hostType))
   217  
   218  	var isDead struct {
   219  		sync.Mutex
   220  		v bool
   221  	}
   222  	client.SetOnHeartbeatFailure(func() {
   223  		isDead.Lock()
   224  		isDead.v = true
   225  		isDead.Unlock()
   226  		conn.Close()
   227  	})
   228  
   229  	// If the reverse dialer (which is always reading from the
   230  	// conn detects that the remote went away, close the buildlet
   231  	// client proactively.
   232  	go func() {
   233  		<-revDialerDone
   234  		isDead.Lock()
   235  		defer isDead.Unlock()
   236  		if !isDead.v {
   237  			client.Close()
   238  		}
   239  	}()
   240  	tstatus := time.Now()
   241  	status, err := client.Status(context.Background())
   242  	if err != nil {
   243  		log.Printf("Reverse connection %s/%s for %s did not answer status after %v: %v",
   244  			hostname, conn.RemoteAddr(), hostType, time.Since(tstatus), err)
   245  		conn.Close()
   246  		return nil, err
   247  	}
   248  	log.Printf("Buildlet %s/%s: %+v for %s", hostname, conn.RemoteAddr(), status, hostType)
   249  	return client, nil
   250  }
   251  
   252  // validateLUCIIDToken verifies that the token is valid and contains the expected fields.
   253  func validateLUCIIDToken(ctx context.Context, jwt string) bool {
   254  	payload, err := idtoken.Validate(ctx, jwt, "https://gomote.golang.org")
   255  	if err != nil {
   256  		log.Printf("rendezvous: unable to validate authentication token: %s", err)
   257  		return false
   258  	}
   259  	if payload.Issuer != "https://accounts.google.com" {
   260  		log.Printf("rendezvous: incorrect issuer: %q", payload.Issuer)
   261  		return false
   262  	}
   263  	if payload.Expires+30 < time.Now().Unix() || payload.IssuedAt-30 > time.Now().Unix() {
   264  		log.Printf("rendezvous: Bad JWT times: expires %v, issued %v", time.Unix(payload.Expires, 0), time.Unix(payload.IssuedAt, 0))
   265  		return false
   266  	}
   267  	email, ok := payload.Claims["email"]
   268  	if !ok || email != "coordinator-builder@golang-ci-luci.iam.gserviceaccount.com" {
   269  		log.Printf("rendezvous: incorrect email=%s", email)
   270  		return false
   271  	}
   272  	emailVerified, ok := payload.Claims["email_verified"].(bool)
   273  	if !ok || !emailVerified {
   274  		log.Printf("rendezvous: email unverified email=%s", email)
   275  		return false
   276  	}
   277  	return true
   278  }