golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/rendezvous/rendezvous.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rendezvous 6 7 import ( 8 "context" 9 "fmt" 10 "log" 11 "net" 12 "net/http" 13 "sync" 14 "time" 15 16 "golang.org/x/build/buildlet" 17 "golang.org/x/build/internal" 18 "golang.org/x/build/revdial/v2" 19 "google.golang.org/api/idtoken" 20 ) 21 22 // result contains the result for a waiting instance registration. 23 type result struct { 24 bc buildlet.Client 25 err error 26 } 27 28 // entry contains the elements needed to process an instance registration. 29 type entry struct { 30 deadline time.Time 31 ch chan *result 32 } 33 34 // TokenValidator verifies if a token is valid. 35 type TokenValidator func(ctx context.Context, jwt string) bool 36 37 // Rendezvous waits for buildlets to connect, verifies they are valid instances 38 // and passes the connection to the waiting caller. 39 type Rendezvous struct { 40 mu sync.Mutex 41 42 m map[string]*entry 43 validator TokenValidator 44 } 45 46 // Option is an optional configuration setting. 47 type Option func(*Rendezvous) 48 49 // OptionValidator changes the verifier used by Rendezvous. 50 func OptionValidator(v TokenValidator) Option { 51 return func(rdv *Rendezvous) { 52 rdv.validator = v 53 } 54 } 55 56 // New creates a Rendezvous element. The context that is passed in should be non-canceled 57 // during the lifetime of the running service. 58 func New(ctx context.Context, opts ...Option) *Rendezvous { 59 rdv := &Rendezvous{ 60 m: make(map[string]*entry), 61 validator: validateLUCIIDToken, 62 } 63 for _, opt := range opts { 64 opt(rdv) 65 } 66 go internal.PeriodicallyDo(ctx, 10*time.Second, func(ctx context.Context, t time.Time) { 67 rdv.purgeExpiredRegistrations() 68 }) 69 return rdv 70 } 71 72 // purgeExpiredRegistrations will purge expired registrations. 73 func (rdv *Rendezvous) purgeExpiredRegistrations() { 74 rdv.mu.Lock() 75 for id, ent := range rdv.m { 76 if time.Now().After(ent.deadline) { 77 log.Printf("rendezvous: stopped waiting for instance=%q due to timeout", id) 78 ent.ch <- &result{err: fmt.Errorf("timed out waiting for rendezvous client=%q", id)} 79 delete(rdv.m, id) 80 } 81 } 82 rdv.mu.Unlock() 83 } 84 85 // RegisterInstance notes an instance and waits for that instance to connect to the handler. An 86 // instance must be registered before the instance can attempt to connect. If an instance does 87 // not connect before the end of the wait period, the instance will not be able to connect. 88 func (rdv *Rendezvous) RegisterInstance(ctx context.Context, id string, wait time.Duration) { 89 rdv.mu.Lock() 90 rdv.m[id] = &entry{ 91 deadline: time.Now().Add(wait), 92 ch: make(chan *result, 1), 93 } 94 rdv.mu.Unlock() 95 } 96 97 // DeregisterInstance removes the registration for an instance which has been 98 // previously registered. 99 func (rdv *Rendezvous) DeregisterInstance(ctx context.Context, id string) { 100 rdv.mu.Lock() 101 delete(rdv.m, id) 102 rdv.mu.Unlock() 103 } 104 105 // WaitForInstance waits for the registered instance to successfully connect. It waits for the 106 // lifetime of the context. If the instance is not registered or has exceeded the timeout period, 107 // it will immediately return an error. 108 func (rdv *Rendezvous) WaitForInstance(ctx context.Context, id string) (buildlet.Client, error) { 109 rdv.mu.Lock() 110 e, ok := rdv.m[id] 111 rdv.mu.Unlock() 112 if !ok { 113 return nil, fmt.Errorf("instance not found: name=%q", id) 114 } 115 select { 116 case <-ctx.Done(): 117 rdv.mu.Lock() 118 delete(rdv.m, id) 119 rdv.mu.Unlock() 120 return nil, fmt.Errorf("context timeout waiting for rendezvous client=%q: %w", id, ctx.Err()) 121 case res := <-e.ch: 122 rdv.mu.Lock() 123 delete(rdv.m, id) 124 close(e.ch) 125 rdv.mu.Unlock() 126 return res.bc, res.err 127 } 128 } 129 130 const ( 131 // HeaderID is the HTTP header used for passing the gomote ID. 132 HeaderID = "X-Go-Gomote-ID" 133 // HeaderToken is the HTTP header used for passing in the authentication token. 134 HeaderToken = "X-Go-Swarming-Auth-Token" 135 // HeaderHostname is the HTTP header used for passing in the hostname. 136 HeaderHostname = "X-Go-Hostname" 137 ) 138 139 // HandleReverse handles HTTP requests from the buildlet and passes the connection to 140 // the waiter. 141 func (rdv *Rendezvous) HandleReverse(w http.ResponseWriter, r *http.Request) { 142 if r.TLS == nil { 143 http.Error(w, "buildlet registration requires SSL", http.StatusInternalServerError) 144 return 145 } 146 var ( 147 id = r.Header.Get(HeaderID) 148 authToken = r.Header.Get(HeaderToken) 149 hostname = r.Header.Get(HeaderHostname) 150 ) 151 if hostname == "" { 152 http.Error(w, "missing X-Go-Hostname header", http.StatusBadRequest) 153 return 154 } 155 if id == "" { 156 http.Error(w, "missing X-Go-Gomote-ID header", http.StatusBadRequest) 157 return 158 } 159 if authToken == "" { 160 http.Error(w, "missing X-Go-Swarming-Auth-Token header", http.StatusBadRequest) 161 return 162 } 163 rdv.mu.Lock() 164 res, ok := rdv.m[id] 165 rdv.mu.Unlock() 166 167 if !ok { 168 http.Error(w, "not expecting buildlet client", http.StatusPreconditionFailed) 169 return 170 } 171 if !rdv.validator(r.Context(), authToken) { 172 log.Printf("rendezvous: Unable to validate authentication token id=%s", id) 173 http.Error(w, "invalid authentication Token", http.StatusPreconditionFailed) 174 return 175 } 176 hj, ok := w.(http.Hijacker) 177 if !ok { 178 http.Error(w, "webserver does not support hijacking", http.StatusHTTPVersionNotSupported) 179 return 180 } 181 conn, _, err := hj.Hijack() 182 if err != nil { 183 http.Error(w, err.Error(), http.StatusInternalServerError) 184 res.ch <- &result{err: err} 185 return 186 } 187 bc, err := connToClient(conn, hostname, "swarming_task") 188 if err != nil { 189 log.Printf("rendezvous: unable to create buildlet client: %s", err) 190 conn.Close() 191 res.ch <- &result{err: err} 192 return 193 } 194 res.ch <- &result{bc: bc} 195 } 196 197 func connToClient(conn net.Conn, hostname, hostType string) (buildlet.Client, error) { 198 if err := (&http.Response{StatusCode: http.StatusSwitchingProtocols, Proto: "HTTP/1.1"}).Write(conn); err != nil { 199 log.Printf("gomote: error writing upgrade response to reverse buildlet %s (%s) at %s: %v", hostname, hostType, conn.RemoteAddr(), err) 200 conn.Close() 201 return nil, err 202 } 203 revDialer := revdial.NewDialer(conn, "/revdial") 204 revDialerDone := revDialer.Done() 205 dialer := revDialer.Dial 206 207 client := buildlet.NewClient(conn.RemoteAddr().String(), buildlet.NoKeyPair) 208 client.SetHTTPClient(&http.Client{ 209 Transport: &http.Transport{ 210 DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 211 return dialer(ctx) 212 }, 213 }, 214 }) 215 client.SetDialer(dialer) 216 client.SetDescription(fmt.Sprintf("reverse peer %s/%s for host type %v", hostname, conn.RemoteAddr(), hostType)) 217 218 var isDead struct { 219 sync.Mutex 220 v bool 221 } 222 client.SetOnHeartbeatFailure(func() { 223 isDead.Lock() 224 isDead.v = true 225 isDead.Unlock() 226 conn.Close() 227 }) 228 229 // If the reverse dialer (which is always reading from the 230 // conn detects that the remote went away, close the buildlet 231 // client proactively. 232 go func() { 233 <-revDialerDone 234 isDead.Lock() 235 defer isDead.Unlock() 236 if !isDead.v { 237 client.Close() 238 } 239 }() 240 tstatus := time.Now() 241 status, err := client.Status(context.Background()) 242 if err != nil { 243 log.Printf("Reverse connection %s/%s for %s did not answer status after %v: %v", 244 hostname, conn.RemoteAddr(), hostType, time.Since(tstatus), err) 245 conn.Close() 246 return nil, err 247 } 248 log.Printf("Buildlet %s/%s: %+v for %s", hostname, conn.RemoteAddr(), status, hostType) 249 return client, nil 250 } 251 252 // validateLUCIIDToken verifies that the token is valid and contains the expected fields. 253 func validateLUCIIDToken(ctx context.Context, jwt string) bool { 254 payload, err := idtoken.Validate(ctx, jwt, "https://gomote.golang.org") 255 if err != nil { 256 log.Printf("rendezvous: unable to validate authentication token: %s", err) 257 return false 258 } 259 if payload.Issuer != "https://accounts.google.com" { 260 log.Printf("rendezvous: incorrect issuer: %q", payload.Issuer) 261 return false 262 } 263 if payload.Expires+30 < time.Now().Unix() || payload.IssuedAt-30 > time.Now().Unix() { 264 log.Printf("rendezvous: Bad JWT times: expires %v, issued %v", time.Unix(payload.Expires, 0), time.Unix(payload.IssuedAt, 0)) 265 return false 266 } 267 email, ok := payload.Claims["email"] 268 if !ok || email != "coordinator-builder@golang-ci-luci.iam.gserviceaccount.com" { 269 log.Printf("rendezvous: incorrect email=%s", email) 270 return false 271 } 272 emailVerified, ok := payload.Claims["email_verified"].(bool) 273 if !ok || !emailVerified { 274 log.Printf("rendezvous: email unverified email=%s", email) 275 return false 276 } 277 return true 278 }