golang.org/x/playground@v0.0.0-20230418134305-14ebe15bcd59/internal/gcpdial/gcpdial.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package gcpdial monitors VM instance groups to let frontends dial 6 // them directly without going through an internal load balancer. 7 package gcpdial 8 9 import ( 10 "context" 11 "fmt" 12 "io" 13 "log" 14 "math/rand" 15 "net/http" 16 "strings" 17 "sync" 18 "time" 19 20 "google.golang.org/api/compute/v1" 21 ) 22 23 type Dialer struct { 24 lister instanceLister 25 26 mu sync.Mutex 27 lastInstances []string // URLs of instances 28 prober map[string]*prober // URL of instance to its prober 29 ready map[string]string // URL of instance to ready IP 30 } 31 32 type prober struct { 33 d *Dialer 34 instURL string 35 cancel func() // called by Dialer to shut down this dialer 36 ctx context.Context // context that's canceled from above 37 38 pi *parsedInstance 39 40 // owned by the probeLoop goroutine: 41 ip string 42 healthy bool 43 } 44 45 func newProber(d *Dialer, instURL string) *prober { 46 ctx, cancel := context.WithCancel(context.Background()) 47 return &prober{ 48 d: d, 49 instURL: instURL, 50 cancel: cancel, 51 ctx: ctx, 52 } 53 } 54 55 func (p *prober) probeLoop() { 56 log.Printf("start prober for %s", p.instURL) 57 defer log.Printf("end prober for %s", p.instURL) 58 59 pi, err := parseInstance(p.instURL) 60 if err != nil { 61 log.Printf("gcpdial: prober %s: failed to parse: %v", p.instURL, err) 62 return 63 } 64 p.pi = pi 65 66 t := time.NewTicker(15 * time.Second) 67 defer t.Stop() 68 for { 69 p.probe() 70 select { 71 case <-p.ctx.Done(): 72 return 73 case <-t.C: 74 } 75 } 76 } 77 78 func (p *prober) probe() { 79 if p.ip == "" && !p.getIP() { 80 return 81 } 82 ctx, cancel := context.WithTimeout(p.ctx, 30*time.Second) 83 defer cancel() 84 req, err := http.NewRequest("GET", "http://"+p.ip+"/healthz", nil) 85 if err != nil { 86 log.Printf("gcpdial: prober %s: NewRequest: %v", p.instURL, err) 87 return 88 } 89 req = req.WithContext(ctx) 90 res, err := http.DefaultClient.Do(req) 91 if res != nil { 92 defer res.Body.Close() 93 defer io.Copy(io.Discard, res.Body) 94 } 95 healthy := err == nil && res.StatusCode == http.StatusOK 96 if healthy == p.healthy { 97 // No change. 98 return 99 } 100 p.healthy = healthy 101 102 p.d.mu.Lock() 103 defer p.d.mu.Unlock() 104 if healthy { 105 if p.d.ready == nil { 106 p.d.ready = map[string]string{} 107 } 108 p.d.ready[p.instURL] = p.ip 109 // TODO: possible optimization: trigger 110 // Dialer.PickIP waiters to wake up rather 111 // than them polling once a second. 112 } else { 113 delete(p.d.ready, p.instURL) 114 var why string 115 if err != nil { 116 why = err.Error() 117 } else { 118 why = res.Status 119 } 120 log.Printf("gcpdial: prober %s: no longer healthy; %v", p.instURL, why) 121 } 122 } 123 124 // getIP populates p.ip and reports whether it did so. 125 func (p *prober) getIP() bool { 126 if p.ip != "" { 127 return true 128 } 129 ctx, cancel := context.WithTimeout(p.ctx, 30*time.Second) 130 defer cancel() 131 svc, err := compute.NewService(ctx) 132 if err != nil { 133 log.Printf("gcpdial: prober %s: NewService: %v", p.instURL, err) 134 return false 135 } 136 inst, err := svc.Instances.Get(p.pi.Project, p.pi.Zone, p.pi.Name).Context(ctx).Do() 137 if err != nil { 138 log.Printf("gcpdial: prober %s: Get: %v", p.instURL, err) 139 return false 140 } 141 var ip string 142 var other []string 143 for _, ni := range inst.NetworkInterfaces { 144 if strings.HasPrefix(ni.NetworkIP, "10.") { 145 ip = ni.NetworkIP 146 } else { 147 other = append(other, ni.NetworkIP) 148 } 149 } 150 if ip == "" { 151 log.Printf("gcpdial: prober %s: didn't find 10.x.x.x IP; found %q", p.instURL, other) 152 return false 153 } 154 p.ip = ip 155 return true 156 } 157 158 // PickIP returns a randomly healthy IP, waiting until one is available, or until ctx expires. 159 func (d *Dialer) PickIP(ctx context.Context) (ip string, err error) { 160 for { 161 if ip, ok := d.pickIP(); ok { 162 return ip, nil 163 } 164 select { 165 case <-ctx.Done(): 166 return "", ctx.Err() 167 case <-time.After(time.Second): 168 } 169 } 170 } 171 172 func (d *Dialer) pickIP() (string, bool) { 173 d.mu.Lock() 174 defer d.mu.Unlock() 175 if len(d.ready) == 0 { 176 return "", false 177 } 178 num := rand.Intn(len(d.ready)) 179 for _, v := range d.ready { 180 if num > 0 { 181 num-- 182 continue 183 } 184 return v, true 185 } 186 panic("not reachable") 187 } 188 189 func (d *Dialer) poll() { 190 // TODO(golang.org/issue/38315) - Plumb a context in here correctly 191 ctx := context.TODO() 192 t := time.NewTicker(10 * time.Second) 193 defer t.Stop() 194 for { 195 d.pollOnce(ctx) 196 select { 197 case <-ctx.Done(): 198 return 199 case <-t.C: 200 } 201 } 202 } 203 204 func (d *Dialer) pollOnce(ctx context.Context) { 205 ctx, cancel := context.WithTimeout(ctx, 30*time.Second) 206 res, err := d.lister.ListInstances(ctx) 207 cancel() 208 if err != nil { 209 log.Printf("gcpdial: polling %v: %v", d.lister, err) 210 return 211 } 212 213 want := map[string]bool{} // the res []string turned into a set 214 for _, instURL := range res { 215 want[instURL] = true 216 } 217 218 d.mu.Lock() 219 defer d.mu.Unlock() 220 // Stop + remove any health check probers that no longer appear in the 221 // instance group. 222 for instURL, prober := range d.prober { 223 if !want[instURL] { 224 prober.cancel() 225 delete(d.prober, instURL) 226 } 227 } 228 // And start any new health check probers that are newly added 229 // (or newly known at least) to the instance group. 230 for _, instURL := range res { 231 if _, ok := d.prober[instURL]; ok { 232 continue 233 } 234 p := newProber(d, instURL) 235 go p.probeLoop() 236 if d.prober == nil { 237 d.prober = map[string]*prober{} 238 } 239 d.prober[instURL] = p 240 } 241 d.lastInstances = res 242 } 243 244 // NewRegionInstanceGroupDialer returns a new dialer that dials named 245 // regional instance group in the provided project and region. 246 // 247 // It begins polling immediately, and there's no way to stop it. 248 // (Until we need one) 249 func NewRegionInstanceGroupDialer(project, region, group string) *Dialer { 250 d := &Dialer{ 251 lister: regionInstanceGroupLister{project, region, group}, 252 } 253 go d.poll() 254 return d 255 } 256 257 // instanceLister is something that can list the current set of VMs. 258 // 259 // The idea is that we'll have both zonal and regional instance group listers, 260 // but currently we only have regionInstanceGroupLister below. 261 type instanceLister interface { 262 // ListInstances returns a list of instances in their API URL form. 263 // 264 // The API URL form is parseable by the parseInstance func. See its docs. 265 ListInstances(context.Context) ([]string, error) 266 } 267 268 // regionInstanceGroupLister is an instanceLister implementation that watches a regional 269 // instance group for changes to its set of VMs. 270 type regionInstanceGroupLister struct { 271 project, region, group string 272 } 273 274 func (rig regionInstanceGroupLister) ListInstances(ctx context.Context) (ret []string, err error) { 275 svc, err := compute.NewService(ctx) 276 if err != nil { 277 return nil, err 278 } 279 rigSvc := svc.RegionInstanceGroups 280 insts, err := rigSvc.ListInstances(rig.project, rig.region, rig.group, &compute.RegionInstanceGroupsListInstancesRequest{ 281 InstanceState: "RUNNING", 282 PortName: "", // all 283 }).Context(ctx).MaxResults(500).Do() 284 if err != nil { 285 return nil, err 286 } 287 // TODO: pagination for really large sets? Currently we truncate the results 288 // to the first 500 VMs, which seems like plenty for now. 289 // 500 is the maximum the API supports; see: 290 // https://pkg.go.dev/google.golang.org/api/compute/v1?tab=doc#RegionInstanceGroupsListInstancesCall.MaxResults 291 for _, it := range insts.Items { 292 ret = append(ret, it.Instance) 293 } 294 return ret, nil 295 } 296 297 // parsedInstance contains the project, zone, and name of a VM. 298 type parsedInstance struct { 299 Project, Zone, Name string 300 } 301 302 // parseInstance parses e.g. "https://www.googleapis.com/compute/v1/projects/golang-org/zones/us-central1-c/instances/playsandbox-7sj8" into its parts. 303 func parseInstance(u string) (*parsedInstance, error) { 304 const pfx = "https://www.googleapis.com/compute/v1/projects/" 305 if !strings.HasPrefix(u, pfx) { 306 return nil, fmt.Errorf("failed to parse instance %q; doesn't begin with %q", u, pfx) 307 } 308 u = u[len(pfx):] // "golang-org/zones/us-central1-c/instances/playsandbox-7sj8" 309 f := strings.Split(u, "/") 310 if len(f) != 5 || f[1] != "zones" || f[3] != "instances" { 311 return nil, fmt.Errorf("failed to parse instance %q; unexpected format", u) 312 } 313 return &parsedInstance{f[0], f[2], f[4]}, nil 314 }