github.com/Tyktechnologies/tyk@v2.9.5+incompatible/gateway/host_checker_manager.go (about) 1 package gateway 2 3 import ( 4 "encoding/base64" 5 "encoding/json" 6 "errors" 7 "net/http" 8 "net/url" 9 "sync" 10 "time" 11 12 uuid "github.com/satori/go.uuid" 13 "github.com/sirupsen/logrus" 14 msgpack "gopkg.in/vmihailenco/msgpack.v2" 15 16 "github.com/TykTechnologies/tyk/apidef" 17 "github.com/TykTechnologies/tyk/config" 18 "github.com/TykTechnologies/tyk/storage" 19 ) 20 21 var GlobalHostChecker HostCheckerManager 22 23 type HostCheckerManager struct { 24 Id string 25 store storage.Handler 26 checkerMu sync.Mutex 27 checker *HostUptimeChecker 28 stopLoop bool 29 pollerStarted bool 30 unhealthyHostList *sync.Map 31 currentHostList map[string]HostData 32 resetsInitiated map[string]bool 33 } 34 35 type UptimeReportData struct { 36 URL string 37 RequestTime int64 38 ResponseCode int 39 TCPError bool 40 ServerError bool 41 Day int 42 Month time.Month 43 Year int 44 Hour int 45 Minute int 46 TimeStamp time.Time 47 ExpireAt time.Time `bson:"expireAt" json:"expireAt"` 48 APIID string 49 OrgID string 50 } 51 52 func (u *UptimeReportData) SetExpiry(expiresInSeconds int64) { 53 expiry := time.Duration(expiresInSeconds) * time.Second 54 55 if expiresInSeconds == 0 { 56 // Expiry is set to 100 years 57 expiry = (24 * time.Hour) * (365 * 100) 58 } 59 60 t := time.Now() 61 t2 := t.Add(expiry) 62 u.ExpireAt = t2 63 } 64 65 const ( 66 UnHealthyHostMetaDataTargetKey = "target_url" 67 UnHealthyHostMetaDataAPIKey = "api_id" 68 UnHealthyHostMetaDataHostKey = "host_name" 69 PollerCacheKey = "PollerActiveInstanceID" 70 PoolerHostSentinelKeyPrefix = "PollerCheckerInstance:" 71 72 UptimeAnalytics_KEYNAME = "tyk-uptime-analytics" 73 ) 74 75 func (hc *HostCheckerManager) Init(store storage.Handler) { 76 hc.store = store 77 hc.unhealthyHostList = new(sync.Map) 78 hc.resetsInitiated = make(map[string]bool) 79 // Generate a new ID for ourselves 80 hc.GenerateCheckerId() 81 } 82 83 func (hc *HostCheckerManager) Start() { 84 // Start loop to check if we are active instance 85 if hc.Id != "" { 86 go hc.CheckActivePollerLoop() 87 if config.Global().UptimeTests.Config.EnableUptimeAnalytics { 88 go hc.UptimePurgeLoop() 89 } 90 } 91 } 92 93 func (hc *HostCheckerManager) GenerateCheckerId() { 94 hc.Id = uuid.NewV4().String() 95 } 96 97 func (hc *HostCheckerManager) CheckActivePollerLoop() { 98 for !hc.stopLoop { 99 // If I'm polling, lets start the loop 100 if hc.AmIPolling() { 101 if !hc.pollerStarted { 102 log.WithFields(logrus.Fields{ 103 "prefix": "host-check-mgr", 104 }).Info("Starting Poller") 105 hc.pollerStarted = true 106 hc.StartPoller() 107 } 108 } else { 109 log.WithFields(logrus.Fields{ 110 "prefix": "host-check-mgr", 111 }).Debug("New master found, no tests running") 112 if hc.pollerStarted { 113 hc.StopPoller() 114 hc.pollerStarted = false 115 } 116 } 117 118 time.Sleep(10 * time.Second) 119 } 120 log.WithFields(logrus.Fields{ 121 "prefix": "host-check-mgr", 122 }).Debug("Stopping uptime tests") 123 } 124 125 func (hc *HostCheckerManager) UptimePurgeLoop() {} 126 127 func (hc *HostCheckerManager) AmIPolling() bool { 128 if hc.store == nil { 129 log.WithFields(logrus.Fields{ 130 "prefix": "host-check-mgr", 131 }).Error("No storage instance set for uptime tests! Disabling poller...") 132 return false 133 } 134 pollerCacheKey := PollerCacheKey 135 if config.Global().UptimeTests.PollerGroup != "" { 136 pollerCacheKey = pollerCacheKey + "." + config.Global().UptimeTests.PollerGroup 137 } 138 139 activeInstance, err := hc.store.GetKey(pollerCacheKey) 140 if err != nil { 141 log.WithFields(logrus.Fields{ 142 "prefix": "host-check-mgr", 143 }).Debug("No Primary instance found, assuming control") 144 hc.store.SetKey(pollerCacheKey, hc.Id, 15) 145 return true 146 } 147 148 if activeInstance == hc.Id { 149 log.WithFields(logrus.Fields{ 150 "prefix": "host-check-mgr", 151 }).Debug("Primary instance set, I am master") 152 hc.store.SetKey(pollerCacheKey, hc.Id, 15) // Reset TTL 153 return true 154 } 155 156 log.WithFields(logrus.Fields{ 157 "prefix": "host-check-mgr", 158 }).Debug("Active Instance is: ", activeInstance) 159 log.WithFields(logrus.Fields{ 160 "prefix": "host-check-mgr", 161 }).Debug("--- I am: ", hc.Id) 162 163 return false 164 } 165 166 func (hc *HostCheckerManager) StartPoller() { 167 168 log.WithFields(logrus.Fields{ 169 "prefix": "host-check-mgr", 170 }).Debug("---> Initialising checker") 171 172 // If we are restarting, we want to retain the host list 173 hc.checkerMu.Lock() 174 if hc.checker == nil { 175 hc.checker = &HostUptimeChecker{} 176 } 177 178 hc.checker.Init(config.Global().UptimeTests.Config.CheckerPoolSize, 179 config.Global().UptimeTests.Config.FailureTriggerSampleSize, 180 config.Global().UptimeTests.Config.TimeWait, 181 hc.currentHostList, 182 hc.OnHostDown, // On failure 183 hc.OnHostBackUp, // On success 184 hc.OnHostReport) // All reports 185 186 // Start the check loop 187 log.WithFields(logrus.Fields{ 188 "prefix": "host-check-mgr", 189 }).Debug("---> Starting checker") 190 hc.checker.Start() 191 log.WithFields(logrus.Fields{ 192 "prefix": "host-check-mgr", 193 }).Debug("---> Checker started.") 194 hc.checkerMu.Unlock() 195 } 196 197 func (hc *HostCheckerManager) StopPoller() { 198 hc.checkerMu.Lock() 199 if hc.checker != nil { 200 hc.checker.Stop() 201 } 202 hc.checkerMu.Unlock() 203 } 204 205 func (hc *HostCheckerManager) getHostKey(report HostHealthReport) string { 206 return PoolerHostSentinelKeyPrefix + report.MetaData[UnHealthyHostMetaDataHostKey] 207 } 208 209 func (hc *HostCheckerManager) OnHostReport(report HostHealthReport) { 210 if config.Global().UptimeTests.Config.EnableUptimeAnalytics { 211 go hc.RecordUptimeAnalytics(report) 212 } 213 } 214 215 func (hc *HostCheckerManager) OnHostDown(report HostHealthReport) { 216 key := hc.getHostKey(report) 217 log.WithFields(logrus.Fields{ 218 "prefix": "host-check-mgr", 219 }).Debug("Update key: ", key) 220 hc.store.SetKey(key, "1", int64(hc.checker.checkTimeout*hc.checker.sampleTriggerLimit)) 221 hc.unhealthyHostList.Store(key, 1) 222 spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey]) 223 if spec == nil { 224 log.WithFields(logrus.Fields{ 225 "prefix": "host-check-mgr", 226 }).Warning("[HOST CHECKER MANAGER] Event can't fire for API that doesn't exist") 227 return 228 } 229 230 spec.FireEvent(EventHOSTDOWN, EventHostStatusMeta{ 231 EventMetaDefault: EventMetaDefault{Message: "Uptime test failed"}, 232 HostInfo: report, 233 }) 234 235 log.WithFields(logrus.Fields{ 236 "prefix": "host-check-mgr", 237 }).Warning("[HOST CHECKER MANAGER] Host is DOWN: ", report.CheckURL) 238 239 if spec.UptimeTests.Config.ServiceDiscovery.UseDiscoveryService { 240 apiID := spec.APIID 241 242 // only do this once 243 _, initiated := hc.resetsInitiated[apiID] 244 if !initiated { 245 hc.resetsInitiated[apiID] = true 246 // Lets re-check the uptime tests after x seconds 247 go func() { 248 log.WithFields(logrus.Fields{ 249 "prefix": "host-check-mgr", 250 }).Printf("[HOST CHECKER MANAGER] Resetting test host list in %v seconds for API: %v", spec.UptimeTests.Config.RecheckWait, apiID) 251 time.Sleep(time.Duration(spec.UptimeTests.Config.RecheckWait) * time.Second) 252 hc.DoServiceDiscoveryListUpdateForID(apiID) 253 delete(hc.resetsInitiated, apiID) 254 }() 255 } 256 } 257 } 258 259 func (hc *HostCheckerManager) OnHostBackUp(report HostHealthReport) { 260 key := hc.getHostKey(report) 261 log.WithFields(logrus.Fields{ 262 "prefix": "host-check-mgr", 263 }).Debug("Delete key: ", key) 264 hc.store.DeleteKey(key) 265 hc.unhealthyHostList.Delete(key) 266 spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey]) 267 if spec == nil { 268 log.WithFields(logrus.Fields{ 269 "prefix": "host-check-mgr", 270 }).Warning("[HOST CHECKER MANAGER] Event can't fire for API that doesn't exist") 271 return 272 } 273 spec.FireEvent(EventHOSTUP, EventHostStatusMeta{ 274 EventMetaDefault: EventMetaDefault{Message: "Uptime test succeeded"}, 275 HostInfo: report, 276 }) 277 278 log.WithFields(logrus.Fields{ 279 "prefix": "host-check-mgr", 280 }).Warning("[HOST CHECKER MANAGER] Host is UP: ", report.CheckURL) 281 } 282 283 func (hc *HostCheckerManager) HostDown(urlStr string) bool { 284 u, err := url.Parse(urlStr) 285 if err != nil { 286 log.WithFields(logrus.Fields{ 287 "prefix": "host-check-mgr", 288 }).Error(err) 289 } 290 291 log.WithFields(logrus.Fields{ 292 "prefix": "host-check-mgr", 293 }).Debug("Key is: ", PoolerHostSentinelKeyPrefix+u.Host) 294 295 key := PoolerHostSentinelKeyPrefix + u.Host 296 // If the node doesn't perform any uptime checks, query the storage: 297 if hc.store != nil && !hc.pollerStarted { 298 v, _ := hc.store.GetKey(key) 299 return v == "1" 300 } 301 _, ok := hc.unhealthyHostList.Load(key) 302 // Found a key, the host is down 303 return ok 304 305 } 306 307 func (hc *HostCheckerManager) PrepareTrackingHost(checkObject apidef.HostCheckObject, apiID string) (HostData, error) { 308 // Build the check URL: 309 var hostData HostData 310 u, err := url.Parse(checkObject.CheckURL) 311 if err != nil { 312 log.WithFields(logrus.Fields{ 313 "prefix": "host-check-mgr", 314 }).Error(err) 315 return hostData, err 316 } 317 318 var bodyData string 319 var bodyByteArr []byte 320 if len(checkObject.Body) > 0 { 321 bodyByteArr, err = base64.StdEncoding.DecodeString(checkObject.Body) 322 if err != nil { 323 log.WithFields(logrus.Fields{ 324 "prefix": "host-check-mgr", 325 }).Error("Failed to load blob data: ", err) 326 return hostData, err 327 } 328 bodyData = string(bodyByteArr) 329 } 330 331 hostData = HostData{ 332 CheckURL: checkObject.CheckURL, 333 MetaData: map[string]string{ 334 UnHealthyHostMetaDataTargetKey: checkObject.CheckURL, 335 UnHealthyHostMetaDataAPIKey: apiID, 336 UnHealthyHostMetaDataHostKey: u.Host, 337 }, 338 Method: checkObject.Method, 339 Protocol: checkObject.Protocol, 340 Timeout: checkObject.Timeout, 341 EnableProxyProtocol: checkObject.EnableProxyProtocol, 342 Commands: checkObject.Commands, 343 Headers: checkObject.Headers, 344 Body: bodyData, 345 } 346 347 return hostData, nil 348 } 349 350 func (hc *HostCheckerManager) UpdateTrackingList(hd []HostData) { 351 log.WithFields(logrus.Fields{ 352 "prefix": "host-check-mgr", 353 }).Debug("--- Setting tracking list up") 354 newHostList := make(map[string]HostData) 355 for _, host := range hd { 356 newHostList[host.CheckURL] = host 357 } 358 359 hc.checkerMu.Lock() 360 hc.currentHostList = newHostList 361 if hc.checker != nil { 362 log.WithFields(logrus.Fields{ 363 "prefix": "host-check-mgr", 364 }).Debug("Reset initiated") 365 hc.checker.ResetList(newHostList) 366 } 367 hc.checkerMu.Unlock() 368 } 369 370 func (hc *HostCheckerManager) UpdateTrackingListByAPIID(hd []HostData, apiId string) { 371 log.WithFields(logrus.Fields{ 372 "prefix": "host-check-mgr", 373 }).Debug("--- Setting tracking list up for ID: ", apiId) 374 newHostList := make(map[string]HostData) 375 376 hc.checkerMu.Lock() 377 for _, existingHost := range hc.currentHostList { 378 if existingHost.MetaData[UnHealthyHostMetaDataAPIKey] != apiId { 379 // Add the old check list that excludes this API 380 newHostList[existingHost.CheckURL] = existingHost 381 } 382 } 383 384 // Add the new list for this APIID: 385 for _, host := range hd { 386 newHostList[host.CheckURL] = host 387 } 388 389 hc.currentHostList = newHostList 390 if hc.checker != nil { 391 log.WithFields(logrus.Fields{ 392 "prefix": "host-check-mgr", 393 }).Debug("Reset initiated") 394 hc.checker.ResetList(newHostList) 395 } 396 hc.checkerMu.Unlock() 397 log.WithFields(logrus.Fields{ 398 "prefix": "host-check-mgr", 399 }).Info("--- Queued tracking list update for API: ", apiId) 400 } 401 402 func (hc *HostCheckerManager) ListFromService(apiID string) ([]HostData, error) { 403 spec := getApiSpec(apiID) 404 if spec == nil { 405 return nil, errors.New("API ID not found in register") 406 } 407 sd := ServiceDiscovery{} 408 sd.Init(&spec.UptimeTests.Config.ServiceDiscovery) 409 data, err := sd.Target(spec.UptimeTests.Config.ServiceDiscovery.QueryEndpoint) 410 411 if err != nil { 412 log.WithFields(logrus.Fields{ 413 "prefix": "host-check-mgr", 414 }).Error("[HOST CHECKER MANAGER] Failed to retrieve host list: ", err) 415 return nil, err 416 } 417 418 // The returned data is a string, so lets unmarshal it: 419 checkTargets := make([]apidef.HostCheckObject, 0) 420 data0, _ := data.GetIndex(0) 421 if err := json.Unmarshal([]byte(data0), &checkTargets); err != nil { 422 log.WithFields(logrus.Fields{ 423 "prefix": "host-check-mgr", 424 }).Error("[HOST CHECKER MANAGER] Decoder failed: ", err) 425 return nil, err 426 } 427 428 hostData := make([]HostData, len(checkTargets)) 429 for i, target := range checkTargets { 430 newHostDoc, err := GlobalHostChecker.PrepareTrackingHost(target, spec.APIID) 431 if err != nil { 432 log.WithFields(logrus.Fields{ 433 "prefix": "host-check-mgr", 434 }).Error("[HOST CHECKER MANAGER] failed to convert to HostData", err) 435 } else { 436 hostData[i] = newHostDoc 437 } 438 } 439 return hostData, nil 440 } 441 442 func (hc *HostCheckerManager) DoServiceDiscoveryListUpdateForID(apiID string) { 443 log.WithFields(logrus.Fields{ 444 "prefix": "host-check-mgr", 445 }).Debug("[HOST CHECKER MANAGER] Getting data from service") 446 hostData, err := hc.ListFromService(apiID) 447 if err != nil { 448 return 449 } 450 451 log.WithFields(logrus.Fields{ 452 "prefix": "host-check-mgr", 453 }).Debug("[HOST CHECKER MANAGER] Data was: \n", hostData) 454 log.WithFields(logrus.Fields{ 455 "prefix": "host-check-mgr", 456 }).Info("[HOST CHECKER MANAGER] Refreshing uptime tests from service for API: ", apiID) 457 hc.UpdateTrackingListByAPIID(hostData, apiID) 458 } 459 460 // RecordHit will store an AnalyticsRecord in Redis 461 func (hc *HostCheckerManager) RecordUptimeAnalytics(report HostHealthReport) error { 462 // If we are obfuscating API Keys, store the hashed representation (config check handled in hashing function) 463 464 spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey]) 465 orgID := "" 466 if spec != nil { 467 orgID = spec.OrgID 468 } 469 470 t := time.Now() 471 472 var serverError bool 473 if report.ResponseCode > http.StatusOK { 474 serverError = true 475 } 476 477 newAnalyticsRecord := UptimeReportData{ 478 URL: report.CheckURL, 479 RequestTime: int64(report.Latency), 480 ResponseCode: report.ResponseCode, 481 TCPError: report.IsTCPError, 482 ServerError: serverError, 483 Day: t.Day(), 484 Month: t.Month(), 485 Year: t.Year(), 486 Hour: t.Hour(), 487 Minute: t.Minute(), 488 TimeStamp: t, 489 APIID: report.MetaData[UnHealthyHostMetaDataAPIKey], 490 OrgID: orgID, 491 } 492 493 // For anlytics purposes, we need a code 494 if report.IsTCPError { 495 newAnalyticsRecord.ResponseCode = 521 496 } 497 498 newAnalyticsRecord.SetExpiry(spec.UptimeTests.Config.ExpireUptimeAnalyticsAfter) 499 500 encoded, err := msgpack.Marshal(newAnalyticsRecord) 501 502 if err != nil { 503 log.WithFields(logrus.Fields{ 504 "prefix": "host-check-mgr", 505 }).Error("Error encoding uptime data:", err) 506 return err 507 } 508 509 log.WithFields(logrus.Fields{ 510 "prefix": "host-check-mgr", 511 }).Debug("Recording uptime stat") 512 hc.store.AppendToSet(UptimeAnalytics_KEYNAME, string(encoded)) 513 return nil 514 } 515 516 func InitHostCheckManager(store storage.Handler) { 517 // Already initialized 518 if GlobalHostChecker.Id != "" { 519 return 520 } 521 522 GlobalHostChecker = HostCheckerManager{} 523 GlobalHostChecker.Init(store) 524 GlobalHostChecker.Start() 525 } 526 527 func SetCheckerHostList() { 528 log.WithFields(logrus.Fields{ 529 "prefix": "host-check-mgr", 530 }).Info("Loading uptime tests...") 531 hostList := []HostData{} 532 apisMu.RLock() 533 for _, spec := range apisByID { 534 if spec.UptimeTests.Config.ServiceDiscovery.UseDiscoveryService { 535 hostList, err := GlobalHostChecker.ListFromService(spec.APIID) 536 if err == nil { 537 hostList = append(hostList, hostList...) 538 for _, t := range hostList { 539 log.WithFields(logrus.Fields{ 540 "prefix": "host-check-mgr", 541 }).WithFields(logrus.Fields{ 542 "prefix": "host-check-mgr", 543 }).Info("---> Adding uptime test: ", t.CheckURL) 544 } 545 } 546 } else { 547 for _, checkItem := range spec.UptimeTests.CheckList { 548 newHostDoc, err := GlobalHostChecker.PrepareTrackingHost(checkItem, spec.APIID) 549 if err == nil { 550 hostList = append(hostList, newHostDoc) 551 log.WithFields(logrus.Fields{ 552 "prefix": "host-check-mgr", 553 }).Info("---> Adding uptime test: ", checkItem.CheckURL) 554 } else { 555 log.WithFields(logrus.Fields{ 556 "prefix": "host-check-mgr", 557 }).Warning("---> Adding uptime test failed: ", checkItem.CheckURL) 558 log.WithFields(logrus.Fields{ 559 "prefix": "host-check-mgr", 560 }).Warning("--------> Error was: ", err) 561 } 562 563 } 564 } 565 } 566 apisMu.RUnlock() 567 568 GlobalHostChecker.UpdateTrackingList(hostList) 569 } 570 571 /* 572 573 ## TEST CONFIGURATION 574 575 uptime_tests: { 576 check_list: [ 577 { 578 "url": "http://google.com:3000/" 579 }, 580 { 581 "url": "`+testHttpPost+`", 582 "method": "POST", 583 "headers": { 584 "this": "that", 585 "more": "beans" 586 }, 587 "body": "VEhJUyBJUyBBIEJPRFkgT0JKRUNUIFRFWFQNCg0KTW9yZSBzdHVmZiBoZXJl" 588 } 589 ] 590 }, 591 592 */