github.com/google/cloudprober@v0.11.3/rds/gcp/gce_instances.go (about) 1 // Copyright 2017-2020 The Cloudprober Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gcp 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "io/ioutil" 22 "math/rand" 23 "net" 24 "net/http" 25 neturl "net/url" 26 "sync" 27 "time" 28 29 "cloud.google.com/go/compute/metadata" 30 "github.com/golang/protobuf/proto" 31 "github.com/google/cloudprober/logger" 32 configpb "github.com/google/cloudprober/rds/gcp/proto" 33 pb "github.com/google/cloudprober/rds/proto" 34 "github.com/google/cloudprober/rds/server/filter" 35 "golang.org/x/oauth2/google" 36 ) 37 38 // This is how long we wait between API calls per zone. 39 const defaultAPICallInterval = 250 * time.Microsecond 40 const computeScope = "https://www.googleapis.com/auth/compute.readonly" 41 42 type accessConfig struct { 43 NatIP string 44 ExternalIpv6 string `json:"externalIpv6,omitempty"` 45 } 46 47 type networkInterface struct { 48 NetworkIP string `json:"networkIP,omitempty"` 49 Ipv6Address string `json:"ipv6Address,omitempty"` 50 51 AliasIPRanges []struct { 52 IPCidrRange string `json:"ipCidrRange,omitempty"` 53 } `json:"aliasIpRanges,omitempty"` 54 55 AccessConfigs []accessConfig 56 Ipv6AccessConfigs []accessConfig `json:"ipv6AccessConfigs,omitempty"` 57 } 58 59 // instanceInfo represents instance items that we fetch from the API. 60 type instanceInfo struct { 61 Name string 62 Labels map[string]string 63 NetworkInterfaces []networkInterface 64 } 65 66 // instanceData represents objects that we store in cache. 67 type instanceData struct { 68 ii *instanceInfo 69 lastUpdated int64 70 } 71 72 /* 73 GCEInstancesFilters defines filters supported by the gce_instances resource 74 type. 75 Example: 76 filter { 77 key: "name" 78 value: "cloudprober.*" 79 } 80 filter { 81 key: "labels.app" 82 value: "service-a" 83 } 84 */ 85 var GCEInstancesFilters = struct { 86 RegexFilterKeys []string 87 LabelsFilter bool 88 }{ 89 []string{"name"}, 90 true, 91 } 92 93 // gceInstancesLister is a GCE instances lister. It implements a cache, 94 // that's populated at a regular interval by making the GCE API calls. 95 // Listing actually only returns the current contents of that cache. 96 type gceInstancesLister struct { 97 project string 98 c *configpb.GCEInstances 99 thisInstance string 100 baseAPIPath string 101 httpClient *http.Client 102 getURLFunc func(client *http.Client, url string) ([]byte, error) 103 l *logger.Logger 104 105 mu sync.RWMutex 106 namesPerScope map[string][]string // "us-e1-b": ["i1", i2"] 107 cachePerScope map[string]map[string]*instanceData // "us-e1-b": {"i1: data} 108 } 109 110 // ipV picks an IP address from an array of v4 and v6 addresses, based on the 111 // asked IP version. 112 // Note: we should consider moving this to a common location. 113 func ipV(ips [2]string, ipVer pb.IPConfig_IPVersion) string { 114 switch ipVer { 115 case pb.IPConfig_IPV4: 116 return ips[0] 117 case pb.IPConfig_IPV6: 118 return ips[1] 119 default: 120 if ips[0] != "" { 121 return ips[0] 122 } 123 return ips[1] 124 } 125 } 126 127 func externalAddr(nic networkInterface, ipVer pb.IPConfig_IPVersion) (string, error) { 128 ips := [2]string{"null", "null"} 129 if len(nic.AccessConfigs) != 0 { 130 ips[0] = nic.AccessConfigs[0].NatIP 131 } 132 if len(nic.Ipv6AccessConfigs) != 0 { 133 ips[1] = nic.Ipv6AccessConfigs[0].ExternalIpv6 134 } 135 ip := ipV(ips, ipVer) 136 if ip == "null" { 137 return "", fmt.Errorf("no %s public IP", ipVer.String()) 138 } 139 return ip, nil 140 } 141 142 func instanceIP(nis []networkInterface, ipConfig *pb.IPConfig) (string, error) { 143 var niIndex int 144 ipType := pb.IPConfig_DEFAULT 145 if ipConfig != nil { 146 niIndex = int(ipConfig.GetNicIndex()) 147 ipType = ipConfig.GetIpType() 148 } 149 150 if len(nis) <= niIndex { 151 return "", fmt.Errorf("no network interface at index %d", niIndex) 152 } 153 154 ni := nis[niIndex] 155 156 switch ipType { 157 case pb.IPConfig_DEFAULT: 158 return ipV([2]string{ni.NetworkIP, ni.Ipv6Address}, ipConfig.GetIpVersion()), nil 159 160 case pb.IPConfig_PUBLIC: 161 return externalAddr(ni, ipConfig.GetIpVersion()) 162 163 case pb.IPConfig_ALIAS: 164 if len(ni.AliasIPRanges) == 0 { 165 return "", fmt.Errorf("no alias IP for NIC(%d)", niIndex) 166 } 167 // Compute API allows specifying CIDR range as an IP address, try that first. 168 if cidrIP := net.ParseIP(ni.AliasIPRanges[0].IPCidrRange); cidrIP != nil { 169 return cidrIP.String(), nil 170 } 171 172 cidrIP, _, err := net.ParseCIDR(ni.AliasIPRanges[0].IPCidrRange) 173 if err != nil { 174 return "", fmt.Errorf("error geting alias IP for NIC(%d): %v", niIndex, err) 175 } 176 return cidrIP.String(), nil 177 } 178 179 return "", nil 180 } 181 182 // listResources returns the list of resource records, where each record 183 // consists of an instance name and the IP address associated with it. IP address 184 // to return is selected based on the provided ipConfig. 185 func (il *gceInstancesLister) listResources(req *pb.ListResourcesRequest) ([]*pb.Resource, error) { 186 var resources []*pb.Resource 187 188 allFilters, err := filter.ParseFilters(req.GetFilter(), GCEInstancesFilters.RegexFilterKeys, "") 189 if err != nil { 190 return nil, err 191 } 192 193 nameFilter, labelsFilter := allFilters.RegexFilters["name"], allFilters.LabelsFilter 194 195 il.mu.RLock() 196 defer il.mu.RUnlock() 197 198 for zone, names := range il.namesPerScope { 199 cache := il.cachePerScope[zone] 200 201 for _, name := range names { 202 ins := cache[name].ii 203 if ins == nil { 204 il.l.Errorf("gce_instances: cached info missing for %s", name) 205 continue 206 } 207 208 if nameFilter != nil && !nameFilter.Match(name, il.l) { 209 continue 210 } 211 if labelsFilter != nil && !labelsFilter.Match(ins.Labels, il.l) { 212 continue 213 } 214 215 nis := ins.NetworkInterfaces 216 ip, err := instanceIP(nis, req.GetIpConfig()) 217 if err != nil { 218 return nil, fmt.Errorf("gce_instances (instance %s): error while getting IP - %v", name, err) 219 } 220 221 resources = append(resources, &pb.Resource{ 222 Name: proto.String(name), 223 Ip: proto.String(ip), 224 Labels: ins.Labels, 225 LastUpdated: proto.Int64(cache[name].lastUpdated), 226 // TODO(manugarg): Add support for returning instance id as well. I want to 227 // implement feature parity with the current targets first and then add 228 // more features. 229 }) 230 } 231 } 232 233 il.l.Infof("gce_instances.listResources: returning %d instances", len(resources)) 234 return resources, nil 235 } 236 237 func parseZonesJSON(resp []byte) ([]string, error) { 238 var itemList struct { 239 Items []struct { 240 Name string 241 } 242 } 243 244 if err := json.Unmarshal(resp, &itemList); err != nil { 245 return nil, fmt.Errorf("error while parsing zones list result: %v", err) 246 } 247 248 keys := make([]string, len(itemList.Items)) 249 for i, item := range itemList.Items { 250 keys[i] = item.Name 251 } 252 253 return keys, nil 254 } 255 256 func parseInstancesJSON(resp []byte) (keys []string, instances map[string]*instanceInfo, err error) { 257 var itemList struct { 258 Items []*instanceInfo 259 } 260 261 if err = json.Unmarshal(resp, &itemList); err != nil { 262 return 263 } 264 265 keys = make([]string, len(itemList.Items)) 266 instances = make(map[string]*instanceInfo) 267 for i, item := range itemList.Items { 268 keys[i] = item.Name 269 instances[keys[i]] = item 270 } 271 272 return 273 } 274 275 func getURLWithClient(client *http.Client, url string) ([]byte, error) { 276 resp, err := client.Get(url) 277 if err != nil { 278 return nil, err 279 } 280 281 if resp.StatusCode != http.StatusOK { 282 return nil, fmt.Errorf("error while fetching URL %s, status: %s", url, resp.Status) 283 } 284 285 respBytes, err := ioutil.ReadAll(resp.Body) 286 if err != nil { 287 return nil, fmt.Errorf("error reading response body: %v", err) 288 } 289 290 return respBytes, nil 291 } 292 293 func (il *gceInstancesLister) expandForZone(zone string) ([]string, map[string]*instanceData, error) { 294 var ( 295 names []string 296 cache = make(map[string]*instanceData) 297 ) 298 299 url := fmt.Sprintf("%s/zones/%s/instances?filter=%s", il.baseAPIPath, zone, neturl.PathEscape("status eq \"RUNNING\"")) 300 respBytes, err := il.getURLFunc(il.httpClient, url) 301 if err != nil { 302 return nil, nil, err 303 } 304 305 keys, instances, err := parseInstancesJSON(respBytes) 306 if err != nil { 307 return nil, nil, err 308 } 309 310 ts := time.Now().Unix() 311 for _, name := range keys { 312 if name == il.thisInstance { 313 continue 314 } 315 cache[name] = &instanceData{instances[name], ts} 316 names = append(names, name) 317 } 318 319 return names, cache, nil 320 } 321 322 // expand runs equivalent API calls as "gcloud compute instances list", 323 // and is what is used to populate the cache. 324 func (il *gceInstancesLister) expand(reEvalInterval time.Duration) { 325 il.l.Infof("gce_instances.expand: running for the project: %s", il.project) 326 327 url := il.baseAPIPath + "/zones" 328 if il.c.GetZoneFilter() != "" { 329 url = fmt.Sprintf("%s?filter=%s", url, neturl.PathEscape(il.c.GetZoneFilter())) 330 } 331 332 respBytes, err := il.getURLFunc(il.httpClient, url) 333 if err != nil { 334 il.l.Errorf("gce_instances.expand: error while listing zones: %v", err) 335 return 336 } 337 338 zones, err := parseZonesJSON(respBytes) 339 if err != nil { 340 il.l.Errorf("gce_instances.expand: error while parsing zones list response: %v", err) 341 return 342 } 343 344 // Shuffle the zones list to change the order in each cycle. 345 rand.Seed(time.Now().UnixNano()) 346 rand.Shuffle(len(zones), func(i, j int) { zones[i], zones[j] = zones[j], zones[i] }) 347 348 il.l.Infof("gce_instances.expand: expanding GCE targets for %d zones", len(zones)) 349 350 var numItems int 351 352 sleepBetweenZones := reEvalInterval / (2 * time.Duration(len(zones)+1)) 353 354 for _, zone := range zones { 355 names, cache, err := il.expandForZone(zone) 356 if err != nil { 357 il.l.Errorf("gce_instances.expand: error while listing instances in zone %s: %v", zone, err) 358 continue 359 } 360 361 il.mu.Lock() 362 il.namesPerScope[zone] = names 363 il.cachePerScope[zone] = cache 364 il.mu.Unlock() 365 366 numItems += len(names) 367 time.Sleep(sleepBetweenZones) 368 } 369 370 il.l.Infof("gce_instances.expand: got %d instances", numItems) 371 } 372 373 func newGCEInstancesLister(project, apiVersion string, c *configpb.GCEInstances, l *logger.Logger) (*gceInstancesLister, error) { 374 var thisInstance string 375 if metadata.OnGCE() { 376 var err error 377 thisInstance, err = metadata.InstanceName() 378 if err != nil { 379 return nil, fmt.Errorf("newGCEInstancesLister: error while getting current instance name: %v", err) 380 } 381 l.Infof("newGCEInstancesLister: this instance: %s", thisInstance) 382 } 383 384 client, err := google.DefaultClient(context.Background(), computeScope) 385 if err != nil { 386 return nil, fmt.Errorf("error creating default HTTP OAuth client: %v", err) 387 } 388 389 il := &gceInstancesLister{ 390 project: project, 391 c: c, 392 thisInstance: thisInstance, 393 baseAPIPath: "https://www.googleapis.com/compute/" + apiVersion + "/projects/" + project, 394 httpClient: client, 395 getURLFunc: getURLWithClient, 396 cachePerScope: make(map[string]map[string]*instanceData), 397 namesPerScope: make(map[string][]string), 398 l: l, 399 } 400 401 reEvalInterval := time.Duration(c.GetReEvalSec()) * time.Second 402 go func() { 403 il.expand(0) 404 // Introduce a random delay between 0-reEvalInterval before 405 // starting the refresh loop. If there are multiple cloudprober 406 // gceInstances, this will make sure that each instance calls GCE 407 // API at a different point of time. 408 rand.Seed(time.Now().UnixNano()) 409 randomDelaySec := rand.Intn(int(reEvalInterval.Seconds())) 410 time.Sleep(time.Duration(randomDelaySec) * time.Second) 411 for range time.Tick(reEvalInterval) { 412 il.expand(reEvalInterval) 413 } 414 }() 415 return il, nil 416 }