github.com/mier85/go-sensor@v1.30.1-0.20220920111756-9bf41b3bc7e0/fsm.go (about)

     1  // (c) Copyright IBM Corp. 2021
     2  // (c) Copyright Instana Inc. 2016
     3  
     4  package instana
     5  
     6  import (
     7  	"fmt"
     8  	"io/ioutil"
     9  	"math"
    10  	"net"
    11  	"os"
    12  	"path/filepath"
    13  	"runtime"
    14  	"strconv"
    15  	"time"
    16  
    17  	f "github.com/looplab/fsm"
    18  )
    19  
    20  const (
    21  	eInit     = "init"
    22  	eLookup   = "lookup"
    23  	eAnnounce = "announce"
    24  	eTest     = "test"
    25  
    26  	retryPeriod                = 30 * 1000 * time.Millisecond
    27  	exponentialRetryPeriodBase = 10 * 1000 * time.Millisecond
    28  	maximumRetries             = 3
    29  )
    30  
    31  type fsmAgent interface {
    32  	getHost() string
    33  	setHost(host string)
    34  	makeURL(prefix string) string
    35  	makeHostURL(host string, prefix string) string
    36  	applyHostAgentSettings(resp agentResponse)
    37  	requestHeader(url string, method string, header string) (string, error)
    38  	announceRequest(url string, method string, data interface{}, ret *agentResponse) (string, error)
    39  	head(url string) (string, error)
    40  }
    41  
    42  type fsmS struct {
    43  	name                       string
    44  	agent                      fsmAgent
    45  	fsm                        *f.FSM
    46  	timer                      *time.Timer
    47  	retriesLeft                int
    48  	expDelayFunc               func(retryNumber int) time.Duration
    49  	lookupAgentHostRetryPeriod time.Duration
    50  	logger                     LeveledLogger
    51  }
    52  
    53  func newFSM(agent fsmAgent, logger LeveledLogger) *fsmS {
    54  	logger.Warn("Stan is on the scene. Starting Instana instrumentation.")
    55  	logger.Debug("initializing fsm")
    56  
    57  	ret := &fsmS{
    58  		agent:                      agent,
    59  		retriesLeft:                maximumRetries,
    60  		expDelayFunc:               expDelay,
    61  		logger:                     logger,
    62  		lookupAgentHostRetryPeriod: retryPeriod,
    63  	}
    64  
    65  	ret.fsm = f.NewFSM(
    66  		"none",
    67  		f.Events{
    68  			{Name: eInit, Src: []string{"none", "unannounced", "announced", "ready"}, Dst: "init"},
    69  			{Name: eLookup, Src: []string{"init"}, Dst: "unannounced"},
    70  			{Name: eAnnounce, Src: []string{"unannounced"}, Dst: "announced"},
    71  			{Name: eTest, Src: []string{"announced"}, Dst: "ready"}},
    72  		f.Callbacks{
    73  			"init":              ret.lookupAgentHost,
    74  			"enter_unannounced": ret.announceSensor,
    75  			"enter_announced":   ret.testAgent,
    76  		})
    77  	ret.fsm.Event(eInit)
    78  
    79  	return ret
    80  }
    81  
    82  func (r *fsmS) scheduleRetry(e *f.Event, cb func(e *f.Event)) {
    83  	r.timer = time.NewTimer(r.lookupAgentHostRetryPeriod)
    84  	go func() {
    85  		<-r.timer.C
    86  		cb(e)
    87  	}()
    88  }
    89  
    90  func (r *fsmS) scheduleRetryWithExponentialDelay(e *f.Event, cb func(e *f.Event), retryNumber int) {
    91  	time.Sleep(r.expDelayFunc(retryNumber))
    92  	cb(e)
    93  }
    94  
    95  func (r *fsmS) lookupAgentHost(e *f.Event) {
    96  	go r.checkHost(e, r.agent.getHost())
    97  }
    98  
    99  func (r *fsmS) checkHost(e *f.Event, host string) {
   100  	r.logger.Debug("checking host ", host)
   101  	header, err := r.agent.requestHeader(r.agent.makeHostURL(host, "/"), "GET", "Server")
   102  
   103  	found := err == nil && header == agentHeader
   104  
   105  	// Agent host is found through the checkHost method, that attempts to read "Instana Agent" from the response header.
   106  	if found {
   107  		r.lookupSuccess(host)
   108  		return
   109  	}
   110  
   111  	if _, fileNotFoundErr := os.Stat("/proc/net/route"); fileNotFoundErr == nil {
   112  		gateway, err := getDefaultGateway("/proc/net/route")
   113  		if err != nil {
   114  			// This will be always the "failed to open /proc/net/route: no such file or directory" error.
   115  			// As this info is not relevant to the customer, we can remove it from the message.
   116  			r.logger.Error("Couldn't open the /proc/net/route file in order to retrieve the default gateway. Scheduling retry.")
   117  			r.scheduleRetry(e, r.lookupAgentHost)
   118  
   119  			return
   120  		}
   121  
   122  		if gateway == "" {
   123  			r.logger.Error("Couldn't parse the default gateway address from /proc/net/route. Scheduling retry.")
   124  			r.scheduleRetry(e, r.lookupAgentHost)
   125  
   126  			return
   127  		}
   128  
   129  		if found {
   130  			r.lookupSuccess(gateway)
   131  			return
   132  		}
   133  
   134  		r.logger.Error("Cannot connect to the agent through localhost or default gateway. Scheduling retry.")
   135  		r.scheduleRetry(e, r.lookupAgentHost)
   136  	} else {
   137  		r.logger.Error("Cannot connect to the agent. Scheduling retry.")
   138  		r.logger.Debug("Connecting through the default gateway has not been attempted because proc/net/route does not exist.")
   139  		r.scheduleRetry(e, r.lookupAgentHost)
   140  	}
   141  }
   142  
   143  func (r *fsmS) lookupSuccess(host string) {
   144  	r.logger.Debug("agent lookup success ", host)
   145  
   146  	r.agent.setHost(host)
   147  	r.retriesLeft = maximumRetries
   148  	r.fsm.Event(eLookup)
   149  }
   150  
   151  func (r *fsmS) announceSensor(e *f.Event) {
   152  	r.logger.Debug("announcing sensor to the agent")
   153  
   154  	go func() {
   155  		defer func() {
   156  			if err := recover(); err != nil {
   157  				r.logger.Debug("Announce recovered:", err)
   158  			}
   159  		}()
   160  
   161  		d := r.getDiscoveryS()
   162  
   163  		var resp agentResponse
   164  		_, err := r.agent.announceRequest(r.agent.makeURL(agentDiscoveryURL), "PUT", d, &resp)
   165  
   166  		if err != nil {
   167  			r.retriesLeft--
   168  			if r.retriesLeft == 0 {
   169  				r.logger.Error("Couldn't announce the sensor after reaching the maximum amount of attempts.")
   170  				r.fsm.Event(eInit)
   171  				return
   172  			} else {
   173  				r.logger.Debug("Cannot announce sensor. Scheduling retry.")
   174  			}
   175  
   176  			retryNumber := maximumRetries - r.retriesLeft + 1
   177  			r.scheduleRetryWithExponentialDelay(e, r.announceSensor, retryNumber)
   178  
   179  			return
   180  		}
   181  
   182  		r.logger.Info("Host agent available. We're in business. Announced pid:", resp.Pid)
   183  		r.agent.applyHostAgentSettings(resp)
   184  
   185  		r.retriesLeft = maximumRetries
   186  		r.fsm.Event(eAnnounce)
   187  
   188  	}()
   189  }
   190  
   191  func (r *fsmS) getDiscoveryS() *discoveryS {
   192  	pid := os.Getpid()
   193  	cpuSetFileContent := ""
   194  
   195  	if runtime.GOOS == "linux" {
   196  		cpuSetFileContent = r.cpuSetFileContent(pid)
   197  	}
   198  
   199  	d := &discoveryS{
   200  		PID:               pid,
   201  		CPUSetFileContent: cpuSetFileContent,
   202  		Name:              os.Args[0],
   203  		Args:              os.Args[1:],
   204  	}
   205  
   206  	if name, args, ok := getProcCommandLine(); ok {
   207  		r.logger.Debug("got cmdline from /proc: ", name)
   208  		d.Name, d.Args = name, args
   209  	} else {
   210  		r.logger.Debug("no /proc, using OS reported cmdline")
   211  	}
   212  
   213  	if _, err := os.Stat("/proc"); err == nil {
   214  		if addr, err := net.ResolveTCPAddr("tcp", r.agent.getHost()+":42699"); err == nil {
   215  			if tcpConn, err := net.DialTCP("tcp", nil, addr); err == nil {
   216  				defer tcpConn.Close()
   217  
   218  				file, err := tcpConn.File()
   219  
   220  				if err != nil {
   221  					r.logger.Error(err)
   222  				} else {
   223  					d.Fd = fmt.Sprintf("%v", file.Fd())
   224  
   225  					link := fmt.Sprintf("/proc/%d/fd/%d", os.Getpid(), file.Fd())
   226  					if _, err := os.Stat(link); err == nil {
   227  						d.Inode, _ = os.Readlink(link)
   228  					}
   229  				}
   230  			}
   231  		}
   232  	}
   233  
   234  	return d
   235  }
   236  
   237  func (r *fsmS) testAgent(e *f.Event) {
   238  	r.logger.Debug("testing communication with the agent")
   239  	go func() {
   240  		_, err := r.agent.head(r.agent.makeURL(agentDataURL))
   241  		b := err == nil
   242  
   243  		if b {
   244  			r.retriesLeft = maximumRetries
   245  			r.fsm.Event(eTest)
   246  		} else {
   247  			r.logger.Debug("Agent is not yet ready. Scheduling retry.")
   248  			r.retriesLeft--
   249  			if r.retriesLeft > 0 {
   250  				retryNumber := maximumRetries - r.retriesLeft + 1
   251  				r.scheduleRetryWithExponentialDelay(e, r.testAgent, retryNumber)
   252  			} else {
   253  				r.fsm.Event(eInit)
   254  			}
   255  		}
   256  	}()
   257  }
   258  
   259  func (r *fsmS) reset() {
   260  	r.retriesLeft = maximumRetries
   261  	r.fsm.Event(eInit)
   262  }
   263  
   264  func (r *fsmS) cpuSetFileContent(pid int) string {
   265  	path := filepath.Join("proc", strconv.Itoa(pid), "cpuset")
   266  	data, err := ioutil.ReadFile(path)
   267  	if err != nil {
   268  		r.logger.Info("error while reading ", path, ":", err.Error())
   269  		return ""
   270  	}
   271  
   272  	return string(data)
   273  }
   274  
   275  func expDelay(retryNumber int) time.Duration {
   276  	return time.Duration(math.Pow(2, float64(retryNumber-1))) * exponentialRetryPeriodBase
   277  }