github.com/Tyktechnologies/tyk@v2.9.5+incompatible/gateway/host_checker_manager.go (about)

     1  package gateway
     2  
     3  import (
     4  	"encoding/base64"
     5  	"encoding/json"
     6  	"errors"
     7  	"net/http"
     8  	"net/url"
     9  	"sync"
    10  	"time"
    11  
    12  	uuid "github.com/satori/go.uuid"
    13  	"github.com/sirupsen/logrus"
    14  	msgpack "gopkg.in/vmihailenco/msgpack.v2"
    15  
    16  	"github.com/TykTechnologies/tyk/apidef"
    17  	"github.com/TykTechnologies/tyk/config"
    18  	"github.com/TykTechnologies/tyk/storage"
    19  )
    20  
    21  var GlobalHostChecker HostCheckerManager
    22  
    23  type HostCheckerManager struct {
    24  	Id                string
    25  	store             storage.Handler
    26  	checkerMu         sync.Mutex
    27  	checker           *HostUptimeChecker
    28  	stopLoop          bool
    29  	pollerStarted     bool
    30  	unhealthyHostList *sync.Map
    31  	currentHostList   map[string]HostData
    32  	resetsInitiated   map[string]bool
    33  }
    34  
    35  type UptimeReportData struct {
    36  	URL          string
    37  	RequestTime  int64
    38  	ResponseCode int
    39  	TCPError     bool
    40  	ServerError  bool
    41  	Day          int
    42  	Month        time.Month
    43  	Year         int
    44  	Hour         int
    45  	Minute       int
    46  	TimeStamp    time.Time
    47  	ExpireAt     time.Time `bson:"expireAt" json:"expireAt"`
    48  	APIID        string
    49  	OrgID        string
    50  }
    51  
    52  func (u *UptimeReportData) SetExpiry(expiresInSeconds int64) {
    53  	expiry := time.Duration(expiresInSeconds) * time.Second
    54  
    55  	if expiresInSeconds == 0 {
    56  		// Expiry is set to 100 years
    57  		expiry = (24 * time.Hour) * (365 * 100)
    58  	}
    59  
    60  	t := time.Now()
    61  	t2 := t.Add(expiry)
    62  	u.ExpireAt = t2
    63  }
    64  
    65  const (
    66  	UnHealthyHostMetaDataTargetKey = "target_url"
    67  	UnHealthyHostMetaDataAPIKey    = "api_id"
    68  	UnHealthyHostMetaDataHostKey   = "host_name"
    69  	PollerCacheKey                 = "PollerActiveInstanceID"
    70  	PoolerHostSentinelKeyPrefix    = "PollerCheckerInstance:"
    71  
    72  	UptimeAnalytics_KEYNAME = "tyk-uptime-analytics"
    73  )
    74  
    75  func (hc *HostCheckerManager) Init(store storage.Handler) {
    76  	hc.store = store
    77  	hc.unhealthyHostList = new(sync.Map)
    78  	hc.resetsInitiated = make(map[string]bool)
    79  	// Generate a new ID for ourselves
    80  	hc.GenerateCheckerId()
    81  }
    82  
    83  func (hc *HostCheckerManager) Start() {
    84  	// Start loop to check if we are active instance
    85  	if hc.Id != "" {
    86  		go hc.CheckActivePollerLoop()
    87  		if config.Global().UptimeTests.Config.EnableUptimeAnalytics {
    88  			go hc.UptimePurgeLoop()
    89  		}
    90  	}
    91  }
    92  
    93  func (hc *HostCheckerManager) GenerateCheckerId() {
    94  	hc.Id = uuid.NewV4().String()
    95  }
    96  
    97  func (hc *HostCheckerManager) CheckActivePollerLoop() {
    98  	for !hc.stopLoop {
    99  		// If I'm polling, lets start the loop
   100  		if hc.AmIPolling() {
   101  			if !hc.pollerStarted {
   102  				log.WithFields(logrus.Fields{
   103  					"prefix": "host-check-mgr",
   104  				}).Info("Starting Poller")
   105  				hc.pollerStarted = true
   106  				hc.StartPoller()
   107  			}
   108  		} else {
   109  			log.WithFields(logrus.Fields{
   110  				"prefix": "host-check-mgr",
   111  			}).Debug("New master found, no tests running")
   112  			if hc.pollerStarted {
   113  				hc.StopPoller()
   114  				hc.pollerStarted = false
   115  			}
   116  		}
   117  
   118  		time.Sleep(10 * time.Second)
   119  	}
   120  	log.WithFields(logrus.Fields{
   121  		"prefix": "host-check-mgr",
   122  	}).Debug("Stopping uptime tests")
   123  }
   124  
   125  func (hc *HostCheckerManager) UptimePurgeLoop() {}
   126  
   127  func (hc *HostCheckerManager) AmIPolling() bool {
   128  	if hc.store == nil {
   129  		log.WithFields(logrus.Fields{
   130  			"prefix": "host-check-mgr",
   131  		}).Error("No storage instance set for uptime tests! Disabling poller...")
   132  		return false
   133  	}
   134  	pollerCacheKey := PollerCacheKey
   135  	if config.Global().UptimeTests.PollerGroup != "" {
   136  		pollerCacheKey = pollerCacheKey + "." + config.Global().UptimeTests.PollerGroup
   137  	}
   138  
   139  	activeInstance, err := hc.store.GetKey(pollerCacheKey)
   140  	if err != nil {
   141  		log.WithFields(logrus.Fields{
   142  			"prefix": "host-check-mgr",
   143  		}).Debug("No Primary instance found, assuming control")
   144  		hc.store.SetKey(pollerCacheKey, hc.Id, 15)
   145  		return true
   146  	}
   147  
   148  	if activeInstance == hc.Id {
   149  		log.WithFields(logrus.Fields{
   150  			"prefix": "host-check-mgr",
   151  		}).Debug("Primary instance set, I am master")
   152  		hc.store.SetKey(pollerCacheKey, hc.Id, 15) // Reset TTL
   153  		return true
   154  	}
   155  
   156  	log.WithFields(logrus.Fields{
   157  		"prefix": "host-check-mgr",
   158  	}).Debug("Active Instance is: ", activeInstance)
   159  	log.WithFields(logrus.Fields{
   160  		"prefix": "host-check-mgr",
   161  	}).Debug("--- I am: ", hc.Id)
   162  
   163  	return false
   164  }
   165  
   166  func (hc *HostCheckerManager) StartPoller() {
   167  
   168  	log.WithFields(logrus.Fields{
   169  		"prefix": "host-check-mgr",
   170  	}).Debug("---> Initialising checker")
   171  
   172  	// If we are restarting, we want to retain the host list
   173  	hc.checkerMu.Lock()
   174  	if hc.checker == nil {
   175  		hc.checker = &HostUptimeChecker{}
   176  	}
   177  
   178  	hc.checker.Init(config.Global().UptimeTests.Config.CheckerPoolSize,
   179  		config.Global().UptimeTests.Config.FailureTriggerSampleSize,
   180  		config.Global().UptimeTests.Config.TimeWait,
   181  		hc.currentHostList,
   182  		hc.OnHostDown,   // On failure
   183  		hc.OnHostBackUp, // On success
   184  		hc.OnHostReport) // All reports
   185  
   186  	// Start the check loop
   187  	log.WithFields(logrus.Fields{
   188  		"prefix": "host-check-mgr",
   189  	}).Debug("---> Starting checker")
   190  	hc.checker.Start()
   191  	log.WithFields(logrus.Fields{
   192  		"prefix": "host-check-mgr",
   193  	}).Debug("---> Checker started.")
   194  	hc.checkerMu.Unlock()
   195  }
   196  
   197  func (hc *HostCheckerManager) StopPoller() {
   198  	hc.checkerMu.Lock()
   199  	if hc.checker != nil {
   200  		hc.checker.Stop()
   201  	}
   202  	hc.checkerMu.Unlock()
   203  }
   204  
   205  func (hc *HostCheckerManager) getHostKey(report HostHealthReport) string {
   206  	return PoolerHostSentinelKeyPrefix + report.MetaData[UnHealthyHostMetaDataHostKey]
   207  }
   208  
   209  func (hc *HostCheckerManager) OnHostReport(report HostHealthReport) {
   210  	if config.Global().UptimeTests.Config.EnableUptimeAnalytics {
   211  		go hc.RecordUptimeAnalytics(report)
   212  	}
   213  }
   214  
   215  func (hc *HostCheckerManager) OnHostDown(report HostHealthReport) {
   216  	key := hc.getHostKey(report)
   217  	log.WithFields(logrus.Fields{
   218  		"prefix": "host-check-mgr",
   219  	}).Debug("Update key: ", key)
   220  	hc.store.SetKey(key, "1", int64(hc.checker.checkTimeout*hc.checker.sampleTriggerLimit))
   221  	hc.unhealthyHostList.Store(key, 1)
   222  	spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey])
   223  	if spec == nil {
   224  		log.WithFields(logrus.Fields{
   225  			"prefix": "host-check-mgr",
   226  		}).Warning("[HOST CHECKER MANAGER] Event can't fire for API that doesn't exist")
   227  		return
   228  	}
   229  
   230  	spec.FireEvent(EventHOSTDOWN, EventHostStatusMeta{
   231  		EventMetaDefault: EventMetaDefault{Message: "Uptime test failed"},
   232  		HostInfo:         report,
   233  	})
   234  
   235  	log.WithFields(logrus.Fields{
   236  		"prefix": "host-check-mgr",
   237  	}).Warning("[HOST CHECKER MANAGER] Host is DOWN: ", report.CheckURL)
   238  
   239  	if spec.UptimeTests.Config.ServiceDiscovery.UseDiscoveryService {
   240  		apiID := spec.APIID
   241  
   242  		// only do this once
   243  		_, initiated := hc.resetsInitiated[apiID]
   244  		if !initiated {
   245  			hc.resetsInitiated[apiID] = true
   246  			// Lets re-check the uptime tests after x seconds
   247  			go func() {
   248  				log.WithFields(logrus.Fields{
   249  					"prefix": "host-check-mgr",
   250  				}).Printf("[HOST CHECKER MANAGER] Resetting test host list in %v seconds for API: %v", spec.UptimeTests.Config.RecheckWait, apiID)
   251  				time.Sleep(time.Duration(spec.UptimeTests.Config.RecheckWait) * time.Second)
   252  				hc.DoServiceDiscoveryListUpdateForID(apiID)
   253  				delete(hc.resetsInitiated, apiID)
   254  			}()
   255  		}
   256  	}
   257  }
   258  
   259  func (hc *HostCheckerManager) OnHostBackUp(report HostHealthReport) {
   260  	key := hc.getHostKey(report)
   261  	log.WithFields(logrus.Fields{
   262  		"prefix": "host-check-mgr",
   263  	}).Debug("Delete key: ", key)
   264  	hc.store.DeleteKey(key)
   265  	hc.unhealthyHostList.Delete(key)
   266  	spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey])
   267  	if spec == nil {
   268  		log.WithFields(logrus.Fields{
   269  			"prefix": "host-check-mgr",
   270  		}).Warning("[HOST CHECKER MANAGER] Event can't fire for API that doesn't exist")
   271  		return
   272  	}
   273  	spec.FireEvent(EventHOSTUP, EventHostStatusMeta{
   274  		EventMetaDefault: EventMetaDefault{Message: "Uptime test succeeded"},
   275  		HostInfo:         report,
   276  	})
   277  
   278  	log.WithFields(logrus.Fields{
   279  		"prefix": "host-check-mgr",
   280  	}).Warning("[HOST CHECKER MANAGER] Host is UP:   ", report.CheckURL)
   281  }
   282  
   283  func (hc *HostCheckerManager) HostDown(urlStr string) bool {
   284  	u, err := url.Parse(urlStr)
   285  	if err != nil {
   286  		log.WithFields(logrus.Fields{
   287  			"prefix": "host-check-mgr",
   288  		}).Error(err)
   289  	}
   290  
   291  	log.WithFields(logrus.Fields{
   292  		"prefix": "host-check-mgr",
   293  	}).Debug("Key is: ", PoolerHostSentinelKeyPrefix+u.Host)
   294  
   295  	key := PoolerHostSentinelKeyPrefix + u.Host
   296  	// If the node doesn't perform any uptime checks, query the storage:
   297  	if hc.store != nil && !hc.pollerStarted {
   298  		v, _ := hc.store.GetKey(key)
   299  		return v == "1"
   300  	}
   301  	_, ok := hc.unhealthyHostList.Load(key)
   302  	// Found a key, the host is down
   303  	return ok
   304  
   305  }
   306  
   307  func (hc *HostCheckerManager) PrepareTrackingHost(checkObject apidef.HostCheckObject, apiID string) (HostData, error) {
   308  	// Build the check URL:
   309  	var hostData HostData
   310  	u, err := url.Parse(checkObject.CheckURL)
   311  	if err != nil {
   312  		log.WithFields(logrus.Fields{
   313  			"prefix": "host-check-mgr",
   314  		}).Error(err)
   315  		return hostData, err
   316  	}
   317  
   318  	var bodyData string
   319  	var bodyByteArr []byte
   320  	if len(checkObject.Body) > 0 {
   321  		bodyByteArr, err = base64.StdEncoding.DecodeString(checkObject.Body)
   322  		if err != nil {
   323  			log.WithFields(logrus.Fields{
   324  				"prefix": "host-check-mgr",
   325  			}).Error("Failed to load blob data: ", err)
   326  			return hostData, err
   327  		}
   328  		bodyData = string(bodyByteArr)
   329  	}
   330  
   331  	hostData = HostData{
   332  		CheckURL: checkObject.CheckURL,
   333  		MetaData: map[string]string{
   334  			UnHealthyHostMetaDataTargetKey: checkObject.CheckURL,
   335  			UnHealthyHostMetaDataAPIKey:    apiID,
   336  			UnHealthyHostMetaDataHostKey:   u.Host,
   337  		},
   338  		Method:              checkObject.Method,
   339  		Protocol:            checkObject.Protocol,
   340  		Timeout:             checkObject.Timeout,
   341  		EnableProxyProtocol: checkObject.EnableProxyProtocol,
   342  		Commands:            checkObject.Commands,
   343  		Headers:             checkObject.Headers,
   344  		Body:                bodyData,
   345  	}
   346  
   347  	return hostData, nil
   348  }
   349  
   350  func (hc *HostCheckerManager) UpdateTrackingList(hd []HostData) {
   351  	log.WithFields(logrus.Fields{
   352  		"prefix": "host-check-mgr",
   353  	}).Debug("--- Setting tracking list up")
   354  	newHostList := make(map[string]HostData)
   355  	for _, host := range hd {
   356  		newHostList[host.CheckURL] = host
   357  	}
   358  
   359  	hc.checkerMu.Lock()
   360  	hc.currentHostList = newHostList
   361  	if hc.checker != nil {
   362  		log.WithFields(logrus.Fields{
   363  			"prefix": "host-check-mgr",
   364  		}).Debug("Reset initiated")
   365  		hc.checker.ResetList(newHostList)
   366  	}
   367  	hc.checkerMu.Unlock()
   368  }
   369  
   370  func (hc *HostCheckerManager) UpdateTrackingListByAPIID(hd []HostData, apiId string) {
   371  	log.WithFields(logrus.Fields{
   372  		"prefix": "host-check-mgr",
   373  	}).Debug("--- Setting tracking list up for ID: ", apiId)
   374  	newHostList := make(map[string]HostData)
   375  
   376  	hc.checkerMu.Lock()
   377  	for _, existingHost := range hc.currentHostList {
   378  		if existingHost.MetaData[UnHealthyHostMetaDataAPIKey] != apiId {
   379  			// Add the old check list that excludes this API
   380  			newHostList[existingHost.CheckURL] = existingHost
   381  		}
   382  	}
   383  
   384  	// Add the new list for this APIID:
   385  	for _, host := range hd {
   386  		newHostList[host.CheckURL] = host
   387  	}
   388  
   389  	hc.currentHostList = newHostList
   390  	if hc.checker != nil {
   391  		log.WithFields(logrus.Fields{
   392  			"prefix": "host-check-mgr",
   393  		}).Debug("Reset initiated")
   394  		hc.checker.ResetList(newHostList)
   395  	}
   396  	hc.checkerMu.Unlock()
   397  	log.WithFields(logrus.Fields{
   398  		"prefix": "host-check-mgr",
   399  	}).Info("--- Queued tracking list update for API: ", apiId)
   400  }
   401  
   402  func (hc *HostCheckerManager) ListFromService(apiID string) ([]HostData, error) {
   403  	spec := getApiSpec(apiID)
   404  	if spec == nil {
   405  		return nil, errors.New("API ID not found in register")
   406  	}
   407  	sd := ServiceDiscovery{}
   408  	sd.Init(&spec.UptimeTests.Config.ServiceDiscovery)
   409  	data, err := sd.Target(spec.UptimeTests.Config.ServiceDiscovery.QueryEndpoint)
   410  
   411  	if err != nil {
   412  		log.WithFields(logrus.Fields{
   413  			"prefix": "host-check-mgr",
   414  		}).Error("[HOST CHECKER MANAGER] Failed to retrieve host list: ", err)
   415  		return nil, err
   416  	}
   417  
   418  	// The returned data is a string, so lets unmarshal it:
   419  	checkTargets := make([]apidef.HostCheckObject, 0)
   420  	data0, _ := data.GetIndex(0)
   421  	if err := json.Unmarshal([]byte(data0), &checkTargets); err != nil {
   422  		log.WithFields(logrus.Fields{
   423  			"prefix": "host-check-mgr",
   424  		}).Error("[HOST CHECKER MANAGER] Decoder failed: ", err)
   425  		return nil, err
   426  	}
   427  
   428  	hostData := make([]HostData, len(checkTargets))
   429  	for i, target := range checkTargets {
   430  		newHostDoc, err := GlobalHostChecker.PrepareTrackingHost(target, spec.APIID)
   431  		if err != nil {
   432  			log.WithFields(logrus.Fields{
   433  				"prefix": "host-check-mgr",
   434  			}).Error("[HOST CHECKER MANAGER] failed to convert to HostData", err)
   435  		} else {
   436  			hostData[i] = newHostDoc
   437  		}
   438  	}
   439  	return hostData, nil
   440  }
   441  
   442  func (hc *HostCheckerManager) DoServiceDiscoveryListUpdateForID(apiID string) {
   443  	log.WithFields(logrus.Fields{
   444  		"prefix": "host-check-mgr",
   445  	}).Debug("[HOST CHECKER MANAGER] Getting data from service")
   446  	hostData, err := hc.ListFromService(apiID)
   447  	if err != nil {
   448  		return
   449  	}
   450  
   451  	log.WithFields(logrus.Fields{
   452  		"prefix": "host-check-mgr",
   453  	}).Debug("[HOST CHECKER MANAGER] Data was: \n", hostData)
   454  	log.WithFields(logrus.Fields{
   455  		"prefix": "host-check-mgr",
   456  	}).Info("[HOST CHECKER MANAGER] Refreshing uptime tests from service for API: ", apiID)
   457  	hc.UpdateTrackingListByAPIID(hostData, apiID)
   458  }
   459  
   460  // RecordHit will store an AnalyticsRecord in Redis
   461  func (hc *HostCheckerManager) RecordUptimeAnalytics(report HostHealthReport) error {
   462  	// If we are obfuscating API Keys, store the hashed representation (config check handled in hashing function)
   463  
   464  	spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey])
   465  	orgID := ""
   466  	if spec != nil {
   467  		orgID = spec.OrgID
   468  	}
   469  
   470  	t := time.Now()
   471  
   472  	var serverError bool
   473  	if report.ResponseCode > http.StatusOK {
   474  		serverError = true
   475  	}
   476  
   477  	newAnalyticsRecord := UptimeReportData{
   478  		URL:          report.CheckURL,
   479  		RequestTime:  int64(report.Latency),
   480  		ResponseCode: report.ResponseCode,
   481  		TCPError:     report.IsTCPError,
   482  		ServerError:  serverError,
   483  		Day:          t.Day(),
   484  		Month:        t.Month(),
   485  		Year:         t.Year(),
   486  		Hour:         t.Hour(),
   487  		Minute:       t.Minute(),
   488  		TimeStamp:    t,
   489  		APIID:        report.MetaData[UnHealthyHostMetaDataAPIKey],
   490  		OrgID:        orgID,
   491  	}
   492  
   493  	// For anlytics purposes, we need a code
   494  	if report.IsTCPError {
   495  		newAnalyticsRecord.ResponseCode = 521
   496  	}
   497  
   498  	newAnalyticsRecord.SetExpiry(spec.UptimeTests.Config.ExpireUptimeAnalyticsAfter)
   499  
   500  	encoded, err := msgpack.Marshal(newAnalyticsRecord)
   501  
   502  	if err != nil {
   503  		log.WithFields(logrus.Fields{
   504  			"prefix": "host-check-mgr",
   505  		}).Error("Error encoding uptime data:", err)
   506  		return err
   507  	}
   508  
   509  	log.WithFields(logrus.Fields{
   510  		"prefix": "host-check-mgr",
   511  	}).Debug("Recording uptime stat")
   512  	hc.store.AppendToSet(UptimeAnalytics_KEYNAME, string(encoded))
   513  	return nil
   514  }
   515  
   516  func InitHostCheckManager(store storage.Handler) {
   517  	// Already initialized
   518  	if GlobalHostChecker.Id != "" {
   519  		return
   520  	}
   521  
   522  	GlobalHostChecker = HostCheckerManager{}
   523  	GlobalHostChecker.Init(store)
   524  	GlobalHostChecker.Start()
   525  }
   526  
   527  func SetCheckerHostList() {
   528  	log.WithFields(logrus.Fields{
   529  		"prefix": "host-check-mgr",
   530  	}).Info("Loading uptime tests...")
   531  	hostList := []HostData{}
   532  	apisMu.RLock()
   533  	for _, spec := range apisByID {
   534  		if spec.UptimeTests.Config.ServiceDiscovery.UseDiscoveryService {
   535  			hostList, err := GlobalHostChecker.ListFromService(spec.APIID)
   536  			if err == nil {
   537  				hostList = append(hostList, hostList...)
   538  				for _, t := range hostList {
   539  					log.WithFields(logrus.Fields{
   540  						"prefix": "host-check-mgr",
   541  					}).WithFields(logrus.Fields{
   542  						"prefix": "host-check-mgr",
   543  					}).Info("---> Adding uptime test: ", t.CheckURL)
   544  				}
   545  			}
   546  		} else {
   547  			for _, checkItem := range spec.UptimeTests.CheckList {
   548  				newHostDoc, err := GlobalHostChecker.PrepareTrackingHost(checkItem, spec.APIID)
   549  				if err == nil {
   550  					hostList = append(hostList, newHostDoc)
   551  					log.WithFields(logrus.Fields{
   552  						"prefix": "host-check-mgr",
   553  					}).Info("---> Adding uptime test: ", checkItem.CheckURL)
   554  				} else {
   555  					log.WithFields(logrus.Fields{
   556  						"prefix": "host-check-mgr",
   557  					}).Warning("---> Adding uptime test failed: ", checkItem.CheckURL)
   558  					log.WithFields(logrus.Fields{
   559  						"prefix": "host-check-mgr",
   560  					}).Warning("--------> Error was: ", err)
   561  				}
   562  
   563  			}
   564  		}
   565  	}
   566  	apisMu.RUnlock()
   567  
   568  	GlobalHostChecker.UpdateTrackingList(hostList)
   569  }
   570  
   571  /*
   572  
   573  ## TEST CONFIGURATION
   574  
   575  uptime_tests: {
   576  	check_list: [
   577  	{
   578  		"url": "http://google.com:3000/"
   579  	},
   580  	{
   581  		"url": "`+testHttpPost+`",
   582  		"method": "POST",
   583  		"headers": {
   584  			"this": "that",
   585  			"more": "beans"
   586  		},
   587  		"body": "VEhJUyBJUyBBIEJPRFkgT0JKRUNUIFRFWFQNCg0KTW9yZSBzdHVmZiBoZXJl"
   588  	}
   589  	]
   590  },
   591  
   592  */