golang.org/x/playground@v0.0.0-20230418134305-14ebe15bcd59/internal/gcpdial/gcpdial.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package gcpdial monitors VM instance groups to let frontends dial
     6  // them directly without going through an internal load balancer.
     7  package gcpdial
     8  
     9  import (
    10  	"context"
    11  	"fmt"
    12  	"io"
    13  	"log"
    14  	"math/rand"
    15  	"net/http"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"google.golang.org/api/compute/v1"
    21  )
    22  
    23  type Dialer struct {
    24  	lister instanceLister
    25  
    26  	mu            sync.Mutex
    27  	lastInstances []string           // URLs of instances
    28  	prober        map[string]*prober // URL of instance to its prober
    29  	ready         map[string]string  // URL of instance to ready IP
    30  }
    31  
    32  type prober struct {
    33  	d       *Dialer
    34  	instURL string
    35  	cancel  func()          // called by Dialer to shut down this dialer
    36  	ctx     context.Context // context that's canceled from above
    37  
    38  	pi *parsedInstance
    39  
    40  	// owned by the probeLoop goroutine:
    41  	ip      string
    42  	healthy bool
    43  }
    44  
    45  func newProber(d *Dialer, instURL string) *prober {
    46  	ctx, cancel := context.WithCancel(context.Background())
    47  	return &prober{
    48  		d:       d,
    49  		instURL: instURL,
    50  		cancel:  cancel,
    51  		ctx:     ctx,
    52  	}
    53  }
    54  
    55  func (p *prober) probeLoop() {
    56  	log.Printf("start prober for %s", p.instURL)
    57  	defer log.Printf("end prober for %s", p.instURL)
    58  
    59  	pi, err := parseInstance(p.instURL)
    60  	if err != nil {
    61  		log.Printf("gcpdial: prober %s: failed to parse: %v", p.instURL, err)
    62  		return
    63  	}
    64  	p.pi = pi
    65  
    66  	t := time.NewTicker(15 * time.Second)
    67  	defer t.Stop()
    68  	for {
    69  		p.probe()
    70  		select {
    71  		case <-p.ctx.Done():
    72  			return
    73  		case <-t.C:
    74  		}
    75  	}
    76  }
    77  
    78  func (p *prober) probe() {
    79  	if p.ip == "" && !p.getIP() {
    80  		return
    81  	}
    82  	ctx, cancel := context.WithTimeout(p.ctx, 30*time.Second)
    83  	defer cancel()
    84  	req, err := http.NewRequest("GET", "http://"+p.ip+"/healthz", nil)
    85  	if err != nil {
    86  		log.Printf("gcpdial: prober %s: NewRequest: %v", p.instURL, err)
    87  		return
    88  	}
    89  	req = req.WithContext(ctx)
    90  	res, err := http.DefaultClient.Do(req)
    91  	if res != nil {
    92  		defer res.Body.Close()
    93  		defer io.Copy(io.Discard, res.Body)
    94  	}
    95  	healthy := err == nil && res.StatusCode == http.StatusOK
    96  	if healthy == p.healthy {
    97  		// No change.
    98  		return
    99  	}
   100  	p.healthy = healthy
   101  
   102  	p.d.mu.Lock()
   103  	defer p.d.mu.Unlock()
   104  	if healthy {
   105  		if p.d.ready == nil {
   106  			p.d.ready = map[string]string{}
   107  		}
   108  		p.d.ready[p.instURL] = p.ip
   109  		// TODO: possible optimization: trigger
   110  		// Dialer.PickIP waiters to wake up rather
   111  		// than them polling once a second.
   112  	} else {
   113  		delete(p.d.ready, p.instURL)
   114  		var why string
   115  		if err != nil {
   116  			why = err.Error()
   117  		} else {
   118  			why = res.Status
   119  		}
   120  		log.Printf("gcpdial: prober %s: no longer healthy; %v", p.instURL, why)
   121  	}
   122  }
   123  
   124  // getIP populates p.ip and reports whether it did so.
   125  func (p *prober) getIP() bool {
   126  	if p.ip != "" {
   127  		return true
   128  	}
   129  	ctx, cancel := context.WithTimeout(p.ctx, 30*time.Second)
   130  	defer cancel()
   131  	svc, err := compute.NewService(ctx)
   132  	if err != nil {
   133  		log.Printf("gcpdial: prober %s: NewService: %v", p.instURL, err)
   134  		return false
   135  	}
   136  	inst, err := svc.Instances.Get(p.pi.Project, p.pi.Zone, p.pi.Name).Context(ctx).Do()
   137  	if err != nil {
   138  		log.Printf("gcpdial: prober %s: Get: %v", p.instURL, err)
   139  		return false
   140  	}
   141  	var ip string
   142  	var other []string
   143  	for _, ni := range inst.NetworkInterfaces {
   144  		if strings.HasPrefix(ni.NetworkIP, "10.") {
   145  			ip = ni.NetworkIP
   146  		} else {
   147  			other = append(other, ni.NetworkIP)
   148  		}
   149  	}
   150  	if ip == "" {
   151  		log.Printf("gcpdial: prober %s: didn't find 10.x.x.x IP; found %q", p.instURL, other)
   152  		return false
   153  	}
   154  	p.ip = ip
   155  	return true
   156  }
   157  
   158  // PickIP returns a randomly healthy IP, waiting until one is available, or until ctx expires.
   159  func (d *Dialer) PickIP(ctx context.Context) (ip string, err error) {
   160  	for {
   161  		if ip, ok := d.pickIP(); ok {
   162  			return ip, nil
   163  		}
   164  		select {
   165  		case <-ctx.Done():
   166  			return "", ctx.Err()
   167  		case <-time.After(time.Second):
   168  		}
   169  	}
   170  }
   171  
   172  func (d *Dialer) pickIP() (string, bool) {
   173  	d.mu.Lock()
   174  	defer d.mu.Unlock()
   175  	if len(d.ready) == 0 {
   176  		return "", false
   177  	}
   178  	num := rand.Intn(len(d.ready))
   179  	for _, v := range d.ready {
   180  		if num > 0 {
   181  			num--
   182  			continue
   183  		}
   184  		return v, true
   185  	}
   186  	panic("not reachable")
   187  }
   188  
   189  func (d *Dialer) poll() {
   190  	// TODO(golang.org/issue/38315) - Plumb a context in here correctly
   191  	ctx := context.TODO()
   192  	t := time.NewTicker(10 * time.Second)
   193  	defer t.Stop()
   194  	for {
   195  		d.pollOnce(ctx)
   196  		select {
   197  		case <-ctx.Done():
   198  			return
   199  		case <-t.C:
   200  		}
   201  	}
   202  }
   203  
   204  func (d *Dialer) pollOnce(ctx context.Context) {
   205  	ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
   206  	res, err := d.lister.ListInstances(ctx)
   207  	cancel()
   208  	if err != nil {
   209  		log.Printf("gcpdial: polling %v: %v", d.lister, err)
   210  		return
   211  	}
   212  
   213  	want := map[string]bool{} // the res []string turned into a set
   214  	for _, instURL := range res {
   215  		want[instURL] = true
   216  	}
   217  
   218  	d.mu.Lock()
   219  	defer d.mu.Unlock()
   220  	// Stop + remove any health check probers that no longer appear in the
   221  	// instance group.
   222  	for instURL, prober := range d.prober {
   223  		if !want[instURL] {
   224  			prober.cancel()
   225  			delete(d.prober, instURL)
   226  		}
   227  	}
   228  	// And start any new health check probers that are newly added
   229  	// (or newly known at least) to the instance group.
   230  	for _, instURL := range res {
   231  		if _, ok := d.prober[instURL]; ok {
   232  			continue
   233  		}
   234  		p := newProber(d, instURL)
   235  		go p.probeLoop()
   236  		if d.prober == nil {
   237  			d.prober = map[string]*prober{}
   238  		}
   239  		d.prober[instURL] = p
   240  	}
   241  	d.lastInstances = res
   242  }
   243  
   244  // NewRegionInstanceGroupDialer returns a new dialer that dials named
   245  // regional instance group in the provided project and region.
   246  //
   247  // It begins polling immediately, and there's no way to stop it.
   248  // (Until we need one)
   249  func NewRegionInstanceGroupDialer(project, region, group string) *Dialer {
   250  	d := &Dialer{
   251  		lister: regionInstanceGroupLister{project, region, group},
   252  	}
   253  	go d.poll()
   254  	return d
   255  }
   256  
   257  // instanceLister is something that can list the current set of VMs.
   258  //
   259  // The idea is that we'll have both zonal and regional instance group listers,
   260  // but currently we only have regionInstanceGroupLister below.
   261  type instanceLister interface {
   262  	// ListInstances returns a list of instances in their API URL form.
   263  	//
   264  	// The API URL form is parseable by the parseInstance func. See its docs.
   265  	ListInstances(context.Context) ([]string, error)
   266  }
   267  
   268  // regionInstanceGroupLister is an instanceLister implementation that watches a regional
   269  // instance group for changes to its set of VMs.
   270  type regionInstanceGroupLister struct {
   271  	project, region, group string
   272  }
   273  
   274  func (rig regionInstanceGroupLister) ListInstances(ctx context.Context) (ret []string, err error) {
   275  	svc, err := compute.NewService(ctx)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	rigSvc := svc.RegionInstanceGroups
   280  	insts, err := rigSvc.ListInstances(rig.project, rig.region, rig.group, &compute.RegionInstanceGroupsListInstancesRequest{
   281  		InstanceState: "RUNNING",
   282  		PortName:      "", // all
   283  	}).Context(ctx).MaxResults(500).Do()
   284  	if err != nil {
   285  		return nil, err
   286  	}
   287  	// TODO: pagination for really large sets? Currently we truncate the results
   288  	// to the first 500 VMs, which seems like plenty for now.
   289  	// 500 is the maximum the API supports; see:
   290  	// https://pkg.go.dev/google.golang.org/api/compute/v1?tab=doc#RegionInstanceGroupsListInstancesCall.MaxResults
   291  	for _, it := range insts.Items {
   292  		ret = append(ret, it.Instance)
   293  	}
   294  	return ret, nil
   295  }
   296  
   297  // parsedInstance contains the project, zone, and name of a VM.
   298  type parsedInstance struct {
   299  	Project, Zone, Name string
   300  }
   301  
   302  // parseInstance parses e.g. "https://www.googleapis.com/compute/v1/projects/golang-org/zones/us-central1-c/instances/playsandbox-7sj8" into its parts.
   303  func parseInstance(u string) (*parsedInstance, error) {
   304  	const pfx = "https://www.googleapis.com/compute/v1/projects/"
   305  	if !strings.HasPrefix(u, pfx) {
   306  		return nil, fmt.Errorf("failed to parse instance %q; doesn't begin with %q", u, pfx)
   307  	}
   308  	u = u[len(pfx):] // "golang-org/zones/us-central1-c/instances/playsandbox-7sj8"
   309  	f := strings.Split(u, "/")
   310  	if len(f) != 5 || f[1] != "zones" || f[3] != "instances" {
   311  		return nil, fmt.Errorf("failed to parse instance %q; unexpected format", u)
   312  	}
   313  	return &parsedInstance{f[0], f[2], f[4]}, nil
   314  }