github.com/google/cloudprober@v0.11.3/rds/gcp/gce_instances.go (about)

     1  // Copyright 2017-2020 The Cloudprober Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gcp
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"math/rand"
    23  	"net"
    24  	"net/http"
    25  	neturl "net/url"
    26  	"sync"
    27  	"time"
    28  
    29  	"cloud.google.com/go/compute/metadata"
    30  	"github.com/golang/protobuf/proto"
    31  	"github.com/google/cloudprober/logger"
    32  	configpb "github.com/google/cloudprober/rds/gcp/proto"
    33  	pb "github.com/google/cloudprober/rds/proto"
    34  	"github.com/google/cloudprober/rds/server/filter"
    35  	"golang.org/x/oauth2/google"
    36  )
    37  
    38  // This is how long we wait between API calls per zone.
    39  const defaultAPICallInterval = 250 * time.Microsecond
    40  const computeScope = "https://www.googleapis.com/auth/compute.readonly"
    41  
    42  type accessConfig struct {
    43  	NatIP        string
    44  	ExternalIpv6 string `json:"externalIpv6,omitempty"`
    45  }
    46  
    47  type networkInterface struct {
    48  	NetworkIP   string `json:"networkIP,omitempty"`
    49  	Ipv6Address string `json:"ipv6Address,omitempty"`
    50  
    51  	AliasIPRanges []struct {
    52  		IPCidrRange string `json:"ipCidrRange,omitempty"`
    53  	} `json:"aliasIpRanges,omitempty"`
    54  
    55  	AccessConfigs     []accessConfig
    56  	Ipv6AccessConfigs []accessConfig `json:"ipv6AccessConfigs,omitempty"`
    57  }
    58  
    59  // instanceInfo represents instance items that we fetch from the API.
    60  type instanceInfo struct {
    61  	Name              string
    62  	Labels            map[string]string
    63  	NetworkInterfaces []networkInterface
    64  }
    65  
    66  // instanceData represents objects that we store in cache.
    67  type instanceData struct {
    68  	ii          *instanceInfo
    69  	lastUpdated int64
    70  }
    71  
    72  /*
    73  GCEInstancesFilters defines filters supported by the gce_instances resource
    74  type.
    75   Example:
    76   filter {
    77  	 key: "name"
    78  	 value: "cloudprober.*"
    79   }
    80   filter {
    81  	 key: "labels.app"
    82  	 value: "service-a"
    83   }
    84  */
    85  var GCEInstancesFilters = struct {
    86  	RegexFilterKeys []string
    87  	LabelsFilter    bool
    88  }{
    89  	[]string{"name"},
    90  	true,
    91  }
    92  
    93  // gceInstancesLister is a GCE instances lister. It implements a cache,
    94  // that's populated at a regular interval by making the GCE API calls.
    95  // Listing actually only returns the current contents of that cache.
    96  type gceInstancesLister struct {
    97  	project      string
    98  	c            *configpb.GCEInstances
    99  	thisInstance string
   100  	baseAPIPath  string
   101  	httpClient   *http.Client
   102  	getURLFunc   func(client *http.Client, url string) ([]byte, error)
   103  	l            *logger.Logger
   104  
   105  	mu            sync.RWMutex
   106  	namesPerScope map[string][]string                 // "us-e1-b": ["i1", i2"]
   107  	cachePerScope map[string]map[string]*instanceData // "us-e1-b": {"i1: data}
   108  }
   109  
   110  // ipV picks an IP address from an array of v4 and v6 addresses, based on the
   111  // asked IP version.
   112  // Note: we should consider moving this to a common location.
   113  func ipV(ips [2]string, ipVer pb.IPConfig_IPVersion) string {
   114  	switch ipVer {
   115  	case pb.IPConfig_IPV4:
   116  		return ips[0]
   117  	case pb.IPConfig_IPV6:
   118  		return ips[1]
   119  	default:
   120  		if ips[0] != "" {
   121  			return ips[0]
   122  		}
   123  		return ips[1]
   124  	}
   125  }
   126  
   127  func externalAddr(nic networkInterface, ipVer pb.IPConfig_IPVersion) (string, error) {
   128  	ips := [2]string{"null", "null"}
   129  	if len(nic.AccessConfigs) != 0 {
   130  		ips[0] = nic.AccessConfigs[0].NatIP
   131  	}
   132  	if len(nic.Ipv6AccessConfigs) != 0 {
   133  		ips[1] = nic.Ipv6AccessConfigs[0].ExternalIpv6
   134  	}
   135  	ip := ipV(ips, ipVer)
   136  	if ip == "null" {
   137  		return "", fmt.Errorf("no %s public IP", ipVer.String())
   138  	}
   139  	return ip, nil
   140  }
   141  
   142  func instanceIP(nis []networkInterface, ipConfig *pb.IPConfig) (string, error) {
   143  	var niIndex int
   144  	ipType := pb.IPConfig_DEFAULT
   145  	if ipConfig != nil {
   146  		niIndex = int(ipConfig.GetNicIndex())
   147  		ipType = ipConfig.GetIpType()
   148  	}
   149  
   150  	if len(nis) <= niIndex {
   151  		return "", fmt.Errorf("no network interface at index %d", niIndex)
   152  	}
   153  
   154  	ni := nis[niIndex]
   155  
   156  	switch ipType {
   157  	case pb.IPConfig_DEFAULT:
   158  		return ipV([2]string{ni.NetworkIP, ni.Ipv6Address}, ipConfig.GetIpVersion()), nil
   159  
   160  	case pb.IPConfig_PUBLIC:
   161  		return externalAddr(ni, ipConfig.GetIpVersion())
   162  
   163  	case pb.IPConfig_ALIAS:
   164  		if len(ni.AliasIPRanges) == 0 {
   165  			return "", fmt.Errorf("no alias IP for NIC(%d)", niIndex)
   166  		}
   167  		// Compute API allows specifying CIDR range as an IP address, try that first.
   168  		if cidrIP := net.ParseIP(ni.AliasIPRanges[0].IPCidrRange); cidrIP != nil {
   169  			return cidrIP.String(), nil
   170  		}
   171  
   172  		cidrIP, _, err := net.ParseCIDR(ni.AliasIPRanges[0].IPCidrRange)
   173  		if err != nil {
   174  			return "", fmt.Errorf("error geting alias IP for NIC(%d): %v", niIndex, err)
   175  		}
   176  		return cidrIP.String(), nil
   177  	}
   178  
   179  	return "", nil
   180  }
   181  
   182  // listResources returns the list of resource records, where each record
   183  // consists of an instance name and the IP address associated with it. IP address
   184  // to return is selected based on the provided ipConfig.
   185  func (il *gceInstancesLister) listResources(req *pb.ListResourcesRequest) ([]*pb.Resource, error) {
   186  	var resources []*pb.Resource
   187  
   188  	allFilters, err := filter.ParseFilters(req.GetFilter(), GCEInstancesFilters.RegexFilterKeys, "")
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  
   193  	nameFilter, labelsFilter := allFilters.RegexFilters["name"], allFilters.LabelsFilter
   194  
   195  	il.mu.RLock()
   196  	defer il.mu.RUnlock()
   197  
   198  	for zone, names := range il.namesPerScope {
   199  		cache := il.cachePerScope[zone]
   200  
   201  		for _, name := range names {
   202  			ins := cache[name].ii
   203  			if ins == nil {
   204  				il.l.Errorf("gce_instances: cached info missing for %s", name)
   205  				continue
   206  			}
   207  
   208  			if nameFilter != nil && !nameFilter.Match(name, il.l) {
   209  				continue
   210  			}
   211  			if labelsFilter != nil && !labelsFilter.Match(ins.Labels, il.l) {
   212  				continue
   213  			}
   214  
   215  			nis := ins.NetworkInterfaces
   216  			ip, err := instanceIP(nis, req.GetIpConfig())
   217  			if err != nil {
   218  				return nil, fmt.Errorf("gce_instances (instance %s): error while getting IP - %v", name, err)
   219  			}
   220  
   221  			resources = append(resources, &pb.Resource{
   222  				Name:        proto.String(name),
   223  				Ip:          proto.String(ip),
   224  				Labels:      ins.Labels,
   225  				LastUpdated: proto.Int64(cache[name].lastUpdated),
   226  				// TODO(manugarg): Add support for returning instance id as well. I want to
   227  				// implement feature parity with the current targets first and then add
   228  				// more features.
   229  			})
   230  		}
   231  	}
   232  
   233  	il.l.Infof("gce_instances.listResources: returning %d instances", len(resources))
   234  	return resources, nil
   235  }
   236  
   237  func parseZonesJSON(resp []byte) ([]string, error) {
   238  	var itemList struct {
   239  		Items []struct {
   240  			Name string
   241  		}
   242  	}
   243  
   244  	if err := json.Unmarshal(resp, &itemList); err != nil {
   245  		return nil, fmt.Errorf("error while parsing zones list result: %v", err)
   246  	}
   247  
   248  	keys := make([]string, len(itemList.Items))
   249  	for i, item := range itemList.Items {
   250  		keys[i] = item.Name
   251  	}
   252  
   253  	return keys, nil
   254  }
   255  
   256  func parseInstancesJSON(resp []byte) (keys []string, instances map[string]*instanceInfo, err error) {
   257  	var itemList struct {
   258  		Items []*instanceInfo
   259  	}
   260  
   261  	if err = json.Unmarshal(resp, &itemList); err != nil {
   262  		return
   263  	}
   264  
   265  	keys = make([]string, len(itemList.Items))
   266  	instances = make(map[string]*instanceInfo)
   267  	for i, item := range itemList.Items {
   268  		keys[i] = item.Name
   269  		instances[keys[i]] = item
   270  	}
   271  
   272  	return
   273  }
   274  
   275  func getURLWithClient(client *http.Client, url string) ([]byte, error) {
   276  	resp, err := client.Get(url)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	if resp.StatusCode != http.StatusOK {
   282  		return nil, fmt.Errorf("error while fetching URL %s, status: %s", url, resp.Status)
   283  	}
   284  
   285  	respBytes, err := ioutil.ReadAll(resp.Body)
   286  	if err != nil {
   287  		return nil, fmt.Errorf("error reading response body: %v", err)
   288  	}
   289  
   290  	return respBytes, nil
   291  }
   292  
   293  func (il *gceInstancesLister) expandForZone(zone string) ([]string, map[string]*instanceData, error) {
   294  	var (
   295  		names []string
   296  		cache = make(map[string]*instanceData)
   297  	)
   298  
   299  	url := fmt.Sprintf("%s/zones/%s/instances?filter=%s", il.baseAPIPath, zone, neturl.PathEscape("status eq \"RUNNING\""))
   300  	respBytes, err := il.getURLFunc(il.httpClient, url)
   301  	if err != nil {
   302  		return nil, nil, err
   303  	}
   304  
   305  	keys, instances, err := parseInstancesJSON(respBytes)
   306  	if err != nil {
   307  		return nil, nil, err
   308  	}
   309  
   310  	ts := time.Now().Unix()
   311  	for _, name := range keys {
   312  		if name == il.thisInstance {
   313  			continue
   314  		}
   315  		cache[name] = &instanceData{instances[name], ts}
   316  		names = append(names, name)
   317  	}
   318  
   319  	return names, cache, nil
   320  }
   321  
   322  // expand runs equivalent API calls as "gcloud compute instances list",
   323  // and is what is used to populate the cache.
   324  func (il *gceInstancesLister) expand(reEvalInterval time.Duration) {
   325  	il.l.Infof("gce_instances.expand: running for the project: %s", il.project)
   326  
   327  	url := il.baseAPIPath + "/zones"
   328  	if il.c.GetZoneFilter() != "" {
   329  		url = fmt.Sprintf("%s?filter=%s", url, neturl.PathEscape(il.c.GetZoneFilter()))
   330  	}
   331  
   332  	respBytes, err := il.getURLFunc(il.httpClient, url)
   333  	if err != nil {
   334  		il.l.Errorf("gce_instances.expand: error while listing zones: %v", err)
   335  		return
   336  	}
   337  
   338  	zones, err := parseZonesJSON(respBytes)
   339  	if err != nil {
   340  		il.l.Errorf("gce_instances.expand: error while parsing zones list response: %v", err)
   341  		return
   342  	}
   343  
   344  	// Shuffle the zones list to change the order in each cycle.
   345  	rand.Seed(time.Now().UnixNano())
   346  	rand.Shuffle(len(zones), func(i, j int) { zones[i], zones[j] = zones[j], zones[i] })
   347  
   348  	il.l.Infof("gce_instances.expand: expanding GCE targets for %d zones", len(zones))
   349  
   350  	var numItems int
   351  
   352  	sleepBetweenZones := reEvalInterval / (2 * time.Duration(len(zones)+1))
   353  
   354  	for _, zone := range zones {
   355  		names, cache, err := il.expandForZone(zone)
   356  		if err != nil {
   357  			il.l.Errorf("gce_instances.expand: error while listing instances in zone %s: %v", zone, err)
   358  			continue
   359  		}
   360  
   361  		il.mu.Lock()
   362  		il.namesPerScope[zone] = names
   363  		il.cachePerScope[zone] = cache
   364  		il.mu.Unlock()
   365  
   366  		numItems += len(names)
   367  		time.Sleep(sleepBetweenZones)
   368  	}
   369  
   370  	il.l.Infof("gce_instances.expand: got %d instances", numItems)
   371  }
   372  
   373  func newGCEInstancesLister(project, apiVersion string, c *configpb.GCEInstances, l *logger.Logger) (*gceInstancesLister, error) {
   374  	var thisInstance string
   375  	if metadata.OnGCE() {
   376  		var err error
   377  		thisInstance, err = metadata.InstanceName()
   378  		if err != nil {
   379  			return nil, fmt.Errorf("newGCEInstancesLister: error while getting current instance name: %v", err)
   380  		}
   381  		l.Infof("newGCEInstancesLister: this instance: %s", thisInstance)
   382  	}
   383  
   384  	client, err := google.DefaultClient(context.Background(), computeScope)
   385  	if err != nil {
   386  		return nil, fmt.Errorf("error creating default HTTP OAuth client: %v", err)
   387  	}
   388  
   389  	il := &gceInstancesLister{
   390  		project:       project,
   391  		c:             c,
   392  		thisInstance:  thisInstance,
   393  		baseAPIPath:   "https://www.googleapis.com/compute/" + apiVersion + "/projects/" + project,
   394  		httpClient:    client,
   395  		getURLFunc:    getURLWithClient,
   396  		cachePerScope: make(map[string]map[string]*instanceData),
   397  		namesPerScope: make(map[string][]string),
   398  		l:             l,
   399  	}
   400  
   401  	reEvalInterval := time.Duration(c.GetReEvalSec()) * time.Second
   402  	go func() {
   403  		il.expand(0)
   404  		// Introduce a random delay between 0-reEvalInterval before
   405  		// starting the refresh loop. If there are multiple cloudprober
   406  		// gceInstances, this will make sure that each instance calls GCE
   407  		// API at a different point of time.
   408  		rand.Seed(time.Now().UnixNano())
   409  		randomDelaySec := rand.Intn(int(reEvalInterval.Seconds()))
   410  		time.Sleep(time.Duration(randomDelaySec) * time.Second)
   411  		for range time.Tick(reEvalInterval) {
   412  			il.expand(reEvalInterval)
   413  		}
   414  	}()
   415  	return il, nil
   416  }