github.com/mholt/caddy-l4@v0.0.0-20241104153248-ec8fae209322/modules/l4proxy/healthchecks.go (about) 1 // Copyright 2020 Matthew Holt 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package l4proxy 16 17 import ( 18 "fmt" 19 "log" 20 "net" 21 "runtime/debug" 22 "time" 23 24 "github.com/caddyserver/caddy/v2" 25 "go.uber.org/zap" 26 ) 27 28 // HealthChecks configures active and passive health checks. 29 type HealthChecks struct { 30 // Active health checks run in the background on a timer. To 31 // minimally enable active health checks, set either path or 32 // port (or both). 33 Active *ActiveHealthChecks `json:"active,omitempty"` 34 35 // Passive health checks monitor proxied connections for errors or timeouts. 36 // To minimally enable passive health checks, specify at least an empty 37 // config object. 38 Passive *PassiveHealthChecks `json:"passive,omitempty"` 39 } 40 41 // ActiveHealthChecks holds configuration related to active health 42 // checks (that is, health checks which occur independently in a 43 // background goroutine). 44 type ActiveHealthChecks struct { 45 // The port to use (if different from the upstream's dial 46 // address) for health checks. 47 Port int `json:"port,omitempty"` 48 49 // How frequently to perform active health checks (default 30s). 50 Interval caddy.Duration `json:"interval,omitempty"` 51 52 // How long to wait for a connection to be established with 53 // peer before considering it unhealthy (default 5s). 54 Timeout caddy.Duration `json:"timeout,omitempty"` 55 56 logger *zap.Logger 57 } 58 59 // PassiveHealthChecks holds configuration related to passive 60 // health checks (that is, health checks which occur during 61 // the normal flow of connection proxying). 62 type PassiveHealthChecks struct { 63 // How long to remember a failed connection to a backend. A 64 // duration > 0 enables passive health checking. Default 0. 65 FailDuration caddy.Duration `json:"fail_duration,omitempty"` 66 67 // The number of failed connections within the FailDuration window to 68 // consider a backend as "down". Must be >= 1; default is 1. Requires 69 // that FailDuration be > 0. 70 MaxFails int `json:"max_fails,omitempty"` 71 72 // Limits the number of simultaneous connections to a backend by 73 // marking the backend as "down" if it has this many or more 74 // concurrent connections. 75 UnhealthyConnectionCount int `json:"unhealthy_connection_count,omitempty"` 76 77 logger *zap.Logger 78 } 79 80 // activeHealthChecker runs active health checks on a 81 // regular basis and blocks until 82 // h.HealthChecks.Active.stopChan is closed. 83 func (h *Handler) activeHealthChecker() { 84 defer func() { 85 if err := recover(); err != nil { 86 log.Printf("[PANIC] active health checks: %v\n%s", err, debug.Stack()) 87 } 88 }() 89 ticker := time.NewTicker(time.Duration(h.HealthChecks.Active.Interval)) 90 h.doActiveHealthCheckForAllHosts() 91 for { 92 select { 93 case <-ticker.C: 94 h.doActiveHealthCheckForAllHosts() 95 case <-h.ctx.Done(): 96 ticker.Stop() 97 return 98 } 99 } 100 } 101 102 // doActiveHealthCheckForAllHosts immediately performs a 103 // health checks for all upstream hosts configured by h. 104 func (h *Handler) doActiveHealthCheckForAllHosts() { 105 for _, upstream := range h.Upstreams { 106 go func(upstream *Upstream) { 107 defer func() { 108 if err := recover(); err != nil { 109 log.Printf("[PANIC] active health check: %v\n%s", err, debug.Stack()) 110 } 111 }() 112 113 for _, p := range upstream.peers { 114 err := h.doActiveHealthCheck(p) 115 if err != nil { 116 h.HealthChecks.Active.logger.Error("active health check failed", 117 zap.String("peer", p.address.String()), 118 zap.Error(err)) 119 } 120 } 121 }(upstream) 122 } 123 } 124 125 // doActiveHealthCheck performs a health check to host which 126 // can be reached at address hostAddr. The health status of 127 // the host will be updated according to whether it passes 128 // the health check. An error is returned only if the health 129 // check fails to occur or if marking the host's health status 130 // fails. 131 func (h *Handler) doActiveHealthCheck(p *peer) error { 132 addr := p.address 133 134 // adjust the port, if configured to be different 135 if h.HealthChecks.Active.Port > 0 { 136 addr.StartPort = uint(h.HealthChecks.Active.Port) 137 addr.EndPort = addr.StartPort 138 } 139 140 hostPort := addr.JoinHostPort(0) 141 timeout := time.Duration(h.HealthChecks.Active.Timeout) 142 143 conn, err := net.DialTimeout(addr.Network, hostPort, timeout) 144 if err != nil { 145 h.HealthChecks.Active.logger.Info("host is down", 146 zap.String("address", addr.String()), 147 zap.Duration("timeout", timeout), 148 zap.Error(err)) 149 _, err2 := p.setHealthy(false) 150 if err2 != nil { 151 return fmt.Errorf("marking unhealthy: %v (original error: %v)", err2, err) 152 } 153 return nil 154 } 155 _ = conn.Close() 156 157 // connection succeeded, so mark as healthy 158 swapped, err := p.setHealthy(true) 159 if swapped { 160 h.HealthChecks.Active.logger.Info("host is up", zap.String("address", addr.String())) 161 } 162 if err != nil { 163 return fmt.Errorf("marking healthy: %v", err) 164 } 165 166 return nil 167 }