github.com/mier85/go-sensor@v1.30.1-0.20220920111756-9bf41b3bc7e0/fsm.go (about) 1 // (c) Copyright IBM Corp. 2021 2 // (c) Copyright Instana Inc. 2016 3 4 package instana 5 6 import ( 7 "fmt" 8 "io/ioutil" 9 "math" 10 "net" 11 "os" 12 "path/filepath" 13 "runtime" 14 "strconv" 15 "time" 16 17 f "github.com/looplab/fsm" 18 ) 19 20 const ( 21 eInit = "init" 22 eLookup = "lookup" 23 eAnnounce = "announce" 24 eTest = "test" 25 26 retryPeriod = 30 * 1000 * time.Millisecond 27 exponentialRetryPeriodBase = 10 * 1000 * time.Millisecond 28 maximumRetries = 3 29 ) 30 31 type fsmAgent interface { 32 getHost() string 33 setHost(host string) 34 makeURL(prefix string) string 35 makeHostURL(host string, prefix string) string 36 applyHostAgentSettings(resp agentResponse) 37 requestHeader(url string, method string, header string) (string, error) 38 announceRequest(url string, method string, data interface{}, ret *agentResponse) (string, error) 39 head(url string) (string, error) 40 } 41 42 type fsmS struct { 43 name string 44 agent fsmAgent 45 fsm *f.FSM 46 timer *time.Timer 47 retriesLeft int 48 expDelayFunc func(retryNumber int) time.Duration 49 lookupAgentHostRetryPeriod time.Duration 50 logger LeveledLogger 51 } 52 53 func newFSM(agent fsmAgent, logger LeveledLogger) *fsmS { 54 logger.Warn("Stan is on the scene. Starting Instana instrumentation.") 55 logger.Debug("initializing fsm") 56 57 ret := &fsmS{ 58 agent: agent, 59 retriesLeft: maximumRetries, 60 expDelayFunc: expDelay, 61 logger: logger, 62 lookupAgentHostRetryPeriod: retryPeriod, 63 } 64 65 ret.fsm = f.NewFSM( 66 "none", 67 f.Events{ 68 {Name: eInit, Src: []string{"none", "unannounced", "announced", "ready"}, Dst: "init"}, 69 {Name: eLookup, Src: []string{"init"}, Dst: "unannounced"}, 70 {Name: eAnnounce, Src: []string{"unannounced"}, Dst: "announced"}, 71 {Name: eTest, Src: []string{"announced"}, Dst: "ready"}}, 72 f.Callbacks{ 73 "init": ret.lookupAgentHost, 74 "enter_unannounced": ret.announceSensor, 75 "enter_announced": ret.testAgent, 76 }) 77 ret.fsm.Event(eInit) 78 79 return ret 80 } 81 82 func (r *fsmS) scheduleRetry(e *f.Event, cb func(e *f.Event)) { 83 r.timer = time.NewTimer(r.lookupAgentHostRetryPeriod) 84 go func() { 85 <-r.timer.C 86 cb(e) 87 }() 88 } 89 90 func (r *fsmS) scheduleRetryWithExponentialDelay(e *f.Event, cb func(e *f.Event), retryNumber int) { 91 time.Sleep(r.expDelayFunc(retryNumber)) 92 cb(e) 93 } 94 95 func (r *fsmS) lookupAgentHost(e *f.Event) { 96 go r.checkHost(e, r.agent.getHost()) 97 } 98 99 func (r *fsmS) checkHost(e *f.Event, host string) { 100 r.logger.Debug("checking host ", host) 101 header, err := r.agent.requestHeader(r.agent.makeHostURL(host, "/"), "GET", "Server") 102 103 found := err == nil && header == agentHeader 104 105 // Agent host is found through the checkHost method, that attempts to read "Instana Agent" from the response header. 106 if found { 107 r.lookupSuccess(host) 108 return 109 } 110 111 if _, fileNotFoundErr := os.Stat("/proc/net/route"); fileNotFoundErr == nil { 112 gateway, err := getDefaultGateway("/proc/net/route") 113 if err != nil { 114 // This will be always the "failed to open /proc/net/route: no such file or directory" error. 115 // As this info is not relevant to the customer, we can remove it from the message. 116 r.logger.Error("Couldn't open the /proc/net/route file in order to retrieve the default gateway. Scheduling retry.") 117 r.scheduleRetry(e, r.lookupAgentHost) 118 119 return 120 } 121 122 if gateway == "" { 123 r.logger.Error("Couldn't parse the default gateway address from /proc/net/route. Scheduling retry.") 124 r.scheduleRetry(e, r.lookupAgentHost) 125 126 return 127 } 128 129 if found { 130 r.lookupSuccess(gateway) 131 return 132 } 133 134 r.logger.Error("Cannot connect to the agent through localhost or default gateway. Scheduling retry.") 135 r.scheduleRetry(e, r.lookupAgentHost) 136 } else { 137 r.logger.Error("Cannot connect to the agent. Scheduling retry.") 138 r.logger.Debug("Connecting through the default gateway has not been attempted because proc/net/route does not exist.") 139 r.scheduleRetry(e, r.lookupAgentHost) 140 } 141 } 142 143 func (r *fsmS) lookupSuccess(host string) { 144 r.logger.Debug("agent lookup success ", host) 145 146 r.agent.setHost(host) 147 r.retriesLeft = maximumRetries 148 r.fsm.Event(eLookup) 149 } 150 151 func (r *fsmS) announceSensor(e *f.Event) { 152 r.logger.Debug("announcing sensor to the agent") 153 154 go func() { 155 defer func() { 156 if err := recover(); err != nil { 157 r.logger.Debug("Announce recovered:", err) 158 } 159 }() 160 161 d := r.getDiscoveryS() 162 163 var resp agentResponse 164 _, err := r.agent.announceRequest(r.agent.makeURL(agentDiscoveryURL), "PUT", d, &resp) 165 166 if err != nil { 167 r.retriesLeft-- 168 if r.retriesLeft == 0 { 169 r.logger.Error("Couldn't announce the sensor after reaching the maximum amount of attempts.") 170 r.fsm.Event(eInit) 171 return 172 } else { 173 r.logger.Debug("Cannot announce sensor. Scheduling retry.") 174 } 175 176 retryNumber := maximumRetries - r.retriesLeft + 1 177 r.scheduleRetryWithExponentialDelay(e, r.announceSensor, retryNumber) 178 179 return 180 } 181 182 r.logger.Info("Host agent available. We're in business. Announced pid:", resp.Pid) 183 r.agent.applyHostAgentSettings(resp) 184 185 r.retriesLeft = maximumRetries 186 r.fsm.Event(eAnnounce) 187 188 }() 189 } 190 191 func (r *fsmS) getDiscoveryS() *discoveryS { 192 pid := os.Getpid() 193 cpuSetFileContent := "" 194 195 if runtime.GOOS == "linux" { 196 cpuSetFileContent = r.cpuSetFileContent(pid) 197 } 198 199 d := &discoveryS{ 200 PID: pid, 201 CPUSetFileContent: cpuSetFileContent, 202 Name: os.Args[0], 203 Args: os.Args[1:], 204 } 205 206 if name, args, ok := getProcCommandLine(); ok { 207 r.logger.Debug("got cmdline from /proc: ", name) 208 d.Name, d.Args = name, args 209 } else { 210 r.logger.Debug("no /proc, using OS reported cmdline") 211 } 212 213 if _, err := os.Stat("/proc"); err == nil { 214 if addr, err := net.ResolveTCPAddr("tcp", r.agent.getHost()+":42699"); err == nil { 215 if tcpConn, err := net.DialTCP("tcp", nil, addr); err == nil { 216 defer tcpConn.Close() 217 218 file, err := tcpConn.File() 219 220 if err != nil { 221 r.logger.Error(err) 222 } else { 223 d.Fd = fmt.Sprintf("%v", file.Fd()) 224 225 link := fmt.Sprintf("/proc/%d/fd/%d", os.Getpid(), file.Fd()) 226 if _, err := os.Stat(link); err == nil { 227 d.Inode, _ = os.Readlink(link) 228 } 229 } 230 } 231 } 232 } 233 234 return d 235 } 236 237 func (r *fsmS) testAgent(e *f.Event) { 238 r.logger.Debug("testing communication with the agent") 239 go func() { 240 _, err := r.agent.head(r.agent.makeURL(agentDataURL)) 241 b := err == nil 242 243 if b { 244 r.retriesLeft = maximumRetries 245 r.fsm.Event(eTest) 246 } else { 247 r.logger.Debug("Agent is not yet ready. Scheduling retry.") 248 r.retriesLeft-- 249 if r.retriesLeft > 0 { 250 retryNumber := maximumRetries - r.retriesLeft + 1 251 r.scheduleRetryWithExponentialDelay(e, r.testAgent, retryNumber) 252 } else { 253 r.fsm.Event(eInit) 254 } 255 } 256 }() 257 } 258 259 func (r *fsmS) reset() { 260 r.retriesLeft = maximumRetries 261 r.fsm.Event(eInit) 262 } 263 264 func (r *fsmS) cpuSetFileContent(pid int) string { 265 path := filepath.Join("proc", strconv.Itoa(pid), "cpuset") 266 data, err := ioutil.ReadFile(path) 267 if err != nil { 268 r.logger.Info("error while reading ", path, ":", err.Error()) 269 return "" 270 } 271 272 return string(data) 273 } 274 275 func expDelay(retryNumber int) time.Duration { 276 return time.Duration(math.Pow(2, float64(retryNumber-1))) * exponentialRetryPeriodBase 277 }