github.com/songzhibin97/gkit@v1.2.13/watching/watching.go (about)

     1  package watching
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"os"
     8  	"runtime"
     9  	"runtime/pprof"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/songzhibin97/gkit/options"
    15  )
    16  
    17  type Watching struct {
    18  	config *configs
    19  
    20  	// stats
    21  	changeLog                int32
    22  	collectCount             int
    23  	gcCycleCount             int
    24  	threadTriggerCount       int
    25  	cpuTriggerCount          int
    26  	memTriggerCount          int
    27  	grTriggerCount           int
    28  	gcHeapTriggerCount       int
    29  	shrinkThreadTriggerCount int
    30  
    31  	// cooldown
    32  	threadCoolDownTime    time.Time
    33  	cpuCoolDownTime       time.Time
    34  	memCoolDownTime       time.Time
    35  	gcHeapCoolDownTime    time.Time
    36  	grCoolDownTime        time.Time
    37  	shrinkThrCoolDownTime time.Time
    38  
    39  	// GC heap triggered, need to dump next time.
    40  	gcHeapTriggered bool
    41  
    42  	// stats ring
    43  	memStats    ring
    44  	cpuStats    ring
    45  	grNumStats  ring
    46  	threadStats ring
    47  	gcHeapStats ring
    48  
    49  	// switch
    50  	stopped int64
    51  
    52  	// lock Protect the following
    53  	sync.Mutex
    54  	// channel for GC sweep finalizer event
    55  	gcEventsCh chan struct{}
    56  	// profiler reporter channels
    57  	rptEventsCh chan rptEvent
    58  }
    59  
    60  // rptEvent stands of the args of report event
    61  type rptEvent struct {
    62  	PType   string
    63  	Buf     []byte
    64  	Reason  string
    65  	EventID string
    66  }
    67  
    68  // EnableThreadDump enables the goroutine dump.
    69  func (w *Watching) EnableThreadDump() *Watching {
    70  	w.config.ThreadConfigs.Enable = true
    71  	return w
    72  }
    73  
    74  // DisableThreadDump disables the goroutine dump.
    75  func (w *Watching) DisableThreadDump() *Watching {
    76  	w.config.ThreadConfigs.Enable = false
    77  	return w
    78  }
    79  
    80  // EnableGoroutineDump enables the goroutine dump.
    81  func (w *Watching) EnableGoroutineDump() *Watching {
    82  	w.config.GroupConfigs.Enable = true
    83  	return w
    84  }
    85  
    86  // DisableGoroutineDump disables the goroutine dump.
    87  func (w *Watching) DisableGoroutineDump() *Watching {
    88  	w.config.GroupConfigs.Enable = false
    89  	return w
    90  }
    91  
    92  // EnableCPUDump enables the CPU dump.
    93  func (w *Watching) EnableCPUDump() *Watching {
    94  	w.config.CpuConfigs.Enable = true
    95  	return w
    96  }
    97  
    98  // DisableCPUDump disables the CPU dump.
    99  func (w *Watching) DisableCPUDump() *Watching {
   100  	w.config.CpuConfigs.Enable = false
   101  	return w
   102  }
   103  
   104  // EnableMemDump enables the mem dump.
   105  func (w *Watching) EnableMemDump() *Watching {
   106  	w.config.MemConfigs.Enable = true
   107  	return w
   108  }
   109  
   110  // DisableMemDump disables the mem dump.
   111  func (w *Watching) DisableMemDump() *Watching {
   112  	w.config.MemConfigs.Enable = false
   113  	return w
   114  }
   115  
   116  // EnableGCHeapDump enables the GC heap dump.
   117  func (w *Watching) EnableGCHeapDump() *Watching {
   118  	w.config.GCHeapConfigs.Enable = true
   119  	return w
   120  }
   121  
   122  // DisableGCHeapDump disables the GC heap dump.
   123  func (w *Watching) DisableGCHeapDump() *Watching {
   124  	w.config.GCHeapConfigs.Enable = false
   125  	return w
   126  }
   127  
   128  func (w *Watching) EnableProfileReporter() {
   129  	if w.config.rptConfigs.reporter == nil {
   130  		w.logf("enable profile reporter fault, reporter is empty")
   131  		return
   132  	}
   133  	atomic.StoreInt32(&w.config.rptConfigs.active, 1)
   134  }
   135  
   136  func (w *Watching) DisableProfileReporter() {
   137  	atomic.StoreInt32(&w.config.rptConfigs.active, 0)
   138  }
   139  
   140  func finalizerCallback(gc *gcHeapFinalizer) {
   141  	defer func() {
   142  		if r := recover(); r != nil {
   143  			gc.w.logf("Panic in finalizer callback: %v", r)
   144  		}
   145  	}()
   146  	// disable or stop gc clean up normally
   147  	if atomic.LoadInt64(&gc.w.stopped) == 1 {
   148  		return
   149  	}
   150  
   151  	// register the finalizer again
   152  	runtime.SetFinalizer(gc, finalizerCallback)
   153  
   154  	ch := gc.w.gcEventsCh
   155  	if ch == nil {
   156  		return
   157  	}
   158  
   159  	select {
   160  	case ch <- struct{}{}:
   161  	default:
   162  		gc.w.logf("can not send event to finalizer channel immediately, may be analyzer blocked?")
   163  	}
   164  }
   165  
   166  type gcHeapFinalizer struct {
   167  	w *Watching
   168  }
   169  
   170  func (w *Watching) startGCCycleLoop(ch chan struct{}) {
   171  	w.gcHeapStats = newRing(minCollectCyclesBeforeDumpStart)
   172  
   173  	gc := &gcHeapFinalizer{
   174  		w,
   175  	}
   176  
   177  	runtime.SetFinalizer(gc, finalizerCallback)
   178  
   179  	go gc.w.gcHeapCheckLoop(ch)
   180  }
   181  
   182  // Start starts the dump loop of Watching.
   183  func (w *Watching) Start() {
   184  	if !atomic.CompareAndSwapInt64(&w.stopped, 1, 0) {
   185  		w.logf("Watching has started, please don't start it again.")
   186  		return
   187  	}
   188  
   189  	w.Lock()
   190  	defer w.Unlock()
   191  
   192  	gcEventsCh := make(chan struct{}, 1)
   193  	rptCh := make(chan rptEvent, 32)
   194  	w.gcEventsCh = gcEventsCh
   195  	w.rptEventsCh = rptCh
   196  
   197  	w.initEnvironment()
   198  	go w.startDumpLoop()
   199  	go w.startReporter(rptCh)
   200  	w.startGCCycleLoop(gcEventsCh)
   201  }
   202  
   203  // Stop the dump loop.
   204  func (w *Watching) Stop() {
   205  	if !atomic.CompareAndSwapInt64(&w.stopped, 0, 1) {
   206  		//nolint
   207  		fmt.Println("Watching has stop, please don't start it again.")
   208  		return
   209  	}
   210  
   211  	w.Lock()
   212  	defer w.Unlock()
   213  
   214  	if gcEventsCh := w.gcEventsCh; gcEventsCh != nil {
   215  		w.gcEventsCh = nil
   216  		close(gcEventsCh)
   217  	}
   218  	if rptEventsCh := w.rptEventsCh; rptEventsCh != nil {
   219  		w.rptEventsCh = nil
   220  		close(rptEventsCh)
   221  	}
   222  }
   223  
   224  func (w *Watching) startDumpLoop() {
   225  	// init previous cool down time
   226  	now := time.Now()
   227  	w.cpuCoolDownTime = now
   228  	w.memCoolDownTime = now
   229  	w.grCoolDownTime = now
   230  
   231  	// init stats ring
   232  	w.cpuStats = newRing(minCollectCyclesBeforeDumpStart)
   233  	w.memStats = newRing(minCollectCyclesBeforeDumpStart)
   234  	w.grNumStats = newRing(minCollectCyclesBeforeDumpStart)
   235  	w.threadStats = newRing(minCollectCyclesBeforeDumpStart)
   236  
   237  	// dump loop
   238  	ticker := time.NewTicker(w.config.CollectInterval)
   239  	defer ticker.Stop()
   240  
   241  	for {
   242  		select {
   243  		case <-w.config.intervalResetting:
   244  			// wait for go version update to 1.15
   245  			// can use Reset API directly here. pkg.go.dev/time#Ticker.Reset
   246  			// we can't use the `for-range` here, because the range loop
   247  			// caches the variable to be lopped and then it can't be overwritten
   248  			itv := w.config.CollectInterval
   249  			fmt.Printf("[Watching] collect interval is resetting to [%v]\n", itv) //nolint:forbidigo
   250  			ticker = time.NewTicker(itv)
   251  		default:
   252  			<-ticker.C
   253  			if atomic.LoadInt64(&w.stopped) == 1 {
   254  				fmt.Println("[Watching] dump loop stopped")
   255  				return
   256  			}
   257  			cpuCore, err := w.getCPUCore()
   258  			if cpuCore == 0 || err != nil {
   259  				w.logf("[Watching] get CPU core failed, CPU core: %v, error: %v", cpuCore, err)
   260  				return
   261  			}
   262  			memoryLimit, err := w.getMemoryLimit()
   263  			if memoryLimit == 0 || err != nil {
   264  				w.logf("[Watching] get memory limit failed, memory limit: %v, error: %v", memoryLimit, err)
   265  				return
   266  			}
   267  			cpu, mem, gNum, tNum, err := collect(cpuCore, memoryLimit)
   268  			if err != nil {
   269  				w.logf(err.Error())
   270  
   271  				continue
   272  			}
   273  
   274  			w.cpuStats.push(cpu)
   275  			w.memStats.push(mem)
   276  			w.grNumStats.push(gNum)
   277  			w.threadStats.push(tNum)
   278  
   279  			w.collectCount++
   280  
   281  			if w.collectCount < minCollectCyclesBeforeDumpStart {
   282  				// at least collect some cycles
   283  				// before start to judge and dump
   284  				w.logf("[Watching] warming up cycle : %d", w.collectCount)
   285  				continue
   286  			}
   287  			if err := w.EnableDump(cpu); err != nil {
   288  				w.logf("[Watching] unable to dump: %v", err)
   289  				continue
   290  			}
   291  
   292  			w.goroutineCheckAndDump(gNum)
   293  			w.memCheckAndDump(mem)
   294  			w.cpuCheckAndDump(cpu)
   295  			w.threadCheckAndDump(tNum)
   296  			w.threadCheckAndShrink(tNum)
   297  		}
   298  	}
   299  }
   300  
   301  // startReporter starts a background goroutine to consume event channel,
   302  // and finish it at after receive from cancel channel.
   303  func (w *Watching) startReporter(ch chan rptEvent) {
   304  	go func() {
   305  		for event := range ch {
   306  			config := w.config.GetReporterConfigs()
   307  			if config.reporter == nil {
   308  				w.logf("reporter is nil, please initial it before startReporter")
   309  				// drop the event
   310  				continue
   311  			}
   312  
   313  			err := w.config.rptConfigs.reporter.Report(event.PType, event.Buf, event.Reason, event.EventID)
   314  			if err != nil {
   315  				w.logf("reporter err:", err)
   316  			}
   317  		}
   318  	}()
   319  }
   320  
   321  // goroutine start.
   322  func (w *Watching) goroutineCheckAndDump(gNum int) {
   323  	// get a copy instead of locking it
   324  	grConfigs := w.config.GetGroupConfigs()
   325  
   326  	if !grConfigs.Enable {
   327  		return
   328  	}
   329  
   330  	if w.grCoolDownTime.After(time.Now()) {
   331  		w.logf("[Watching] goroutine dump is in cooldown")
   332  		return
   333  	}
   334  
   335  	if triggered := w.goroutineProfile(gNum, grConfigs); triggered {
   336  		w.grCoolDownTime = time.Now().Add(w.config.CoolDown)
   337  		w.grTriggerCount++
   338  	}
   339  }
   340  
   341  func (w *Watching) goroutineProfile(gNum int, c groupConfigs) bool {
   342  	match, reason := matchRule(w.grNumStats, gNum, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, c.GoroutineTriggerNumMax)
   343  	if !match {
   344  		w.debugf(UniformLogFormat, "NODUMP", type2name[goroutine],
   345  			c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
   346  			c.GoroutineTriggerNumMax, w.grNumStats.data, gNum)
   347  		return false
   348  	}
   349  	w.logf("watching.goroutine", UniformLogFormat, "pprof", type2name[goroutine],
   350  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
   351  		c.GoroutineTriggerNumMax, w.grNumStats.data, gNum)
   352  
   353  	var buf bytes.Buffer
   354  	_ = pprof.Lookup("goroutine").WriteTo(&buf, int(w.config.DumpProfileType)) // nolint: errcheck
   355  	w.writeGrProfileDataToFile(buf, c, goroutine, gNum)
   356  
   357  	w.reportProfile(type2name[goroutine], buf.Bytes(), reason, "")
   358  	return true
   359  }
   360  
   361  // memory start.
   362  func (w *Watching) memCheckAndDump(mem int) {
   363  	memConfig := w.config.GetMemConfigs()
   364  
   365  	if !memConfig.Enable {
   366  		return
   367  	}
   368  
   369  	if w.memCoolDownTime.After(time.Now()) {
   370  		w.logf("[Watching] mem dump is in cooldown")
   371  		return
   372  	}
   373  
   374  	if triggered := w.memProfile(mem, memConfig); triggered {
   375  		w.memCoolDownTime = time.Now().Add(w.config.CoolDown)
   376  		w.memTriggerCount++
   377  	}
   378  }
   379  
   380  func (w *Watching) memProfile(rss int, c typeConfig) bool {
   381  	match, reason := matchRule(w.memStats, rss, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
   382  	if !match {
   383  		// let user know why this should not dump
   384  		w.debugf(UniformLogFormat, "NODUMP", type2name[mem],
   385  			c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   386  			w.memStats.data, rss)
   387  
   388  		return false
   389  	}
   390  
   391  	w.logf("watching.memory", UniformLogFormat, "pprof", type2name[mem],
   392  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   393  		w.memStats.data, rss)
   394  
   395  	var buf bytes.Buffer
   396  	_ = pprof.Lookup("heap").WriteTo(&buf, int(w.config.DumpProfileType)) // nolint: errcheck
   397  	w.writeProfileDataToFile(buf, c, mem, rss, w.memStats, "")
   398  
   399  	w.reportProfile(type2name[mem], buf.Bytes(), reason, "")
   400  	return true
   401  }
   402  
   403  func (w *Watching) threadCheckAndShrink(threadNum int) {
   404  	shrink := w.config.ShrinkThrConfigs
   405  
   406  	if shrink == nil || !shrink.Enable {
   407  		return
   408  	}
   409  
   410  	if w.shrinkThrCoolDownTime.After(time.Now()) {
   411  		return
   412  	}
   413  
   414  	if threadNum > shrink.Threshold {
   415  		// 100x Delay time a cooldown time as default
   416  		delay := shrink.Delay * 100
   417  		// one hour at least
   418  		if delay < time.Hour {
   419  			delay = time.Hour
   420  		}
   421  		if delay > time.Hour*24 {
   422  			delay = time.Hour * 24
   423  		}
   424  
   425  		// 100x Delay time a cooldown time
   426  		w.shrinkThrCoolDownTime = time.Now().Add(delay)
   427  
   428  		w.logf("current thread number(%v) larger than threshold(%v), will start to shrink thread after %v", threadNum, shrink.Threshold, shrink.Delay)
   429  		time.AfterFunc(shrink.Delay, func() {
   430  			w.startShrinkThread()
   431  		})
   432  	}
   433  }
   434  
   435  // TODO: better only shrink the threads that are idle.
   436  func (w *Watching) startShrinkThread() {
   437  	c := w.config.GetShrinkThreadConfigs()
   438  	curThreadNum := getThreadNum()
   439  	n := curThreadNum - c.Threshold
   440  
   441  	// check again after the timer triggered
   442  	if c.Enable && n > 0 {
   443  		w.shrinkThreadTriggerCount++
   444  		w.logf("start to shrink %v threads, now: %v", n, curThreadNum)
   445  
   446  		var wg sync.WaitGroup
   447  		wg.Add(n)
   448  		for i := 0; i < n; i++ {
   449  			// avoid close too much thread in batch.
   450  			time.Sleep(time.Millisecond * 100)
   451  
   452  			go func() {
   453  				defer wg.Done()
   454  				runtime.LockOSThread()
   455  			}()
   456  		}
   457  		wg.Wait()
   458  
   459  		w.logf("finished shrink threads, now: %v", getThreadNum())
   460  	}
   461  }
   462  
   463  // thread start.
   464  func (w *Watching) threadCheckAndDump(threadNum int) {
   465  	threadConfig := w.config.GetThreadConfigs()
   466  
   467  	if !threadConfig.Enable {
   468  		return
   469  	}
   470  
   471  	if w.threadCoolDownTime.After(time.Now()) {
   472  		w.logf("[Watching] thread dump is in cooldown")
   473  		return
   474  	}
   475  
   476  	if triggered := w.threadProfile(threadNum, threadConfig); triggered {
   477  		w.threadCoolDownTime = time.Now().Add(w.config.CoolDown)
   478  		w.threadTriggerCount++
   479  	}
   480  }
   481  
   482  func (w *Watching) threadProfile(curThreadNum int, c typeConfig) bool {
   483  	match, reason := matchRule(w.threadStats, curThreadNum, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
   484  	if !match {
   485  		// let user know why this should not dump
   486  		w.debugf(UniformLogFormat, "NODUMP", type2name[thread],
   487  			c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   488  			w.threadStats.data, curThreadNum)
   489  
   490  		return false
   491  	}
   492  
   493  	w.logf("watching.thread", UniformLogFormat, "pprof", type2name[thread],
   494  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
   495  		NotSupportTypeMaxConfig, w.threadStats, curThreadNum)
   496  
   497  	eventID := fmt.Sprintf("thr-%d", w.threadTriggerCount)
   498  
   499  	var buf bytes.Buffer
   500  	_ = pprof.Lookup("threadcreate").WriteTo(&buf, int(w.config.DumpProfileType)) // nolint: errcheck
   501  	w.writeProfileDataToFile(buf, c, thread, curThreadNum, w.threadStats, eventID)
   502  
   503  	w.reportProfile(type2name[thread], buf.Bytes(), reason, eventID)
   504  
   505  	buf.Reset()
   506  	_ = pprof.Lookup("goroutine").WriteTo(&buf, int(w.config.DumpProfileType)) // nolint: errcheck
   507  	w.writeProfileDataToFile(buf, c, goroutine, curThreadNum, w.threadStats, eventID)
   508  
   509  	w.reportProfile("goroutine", buf.Bytes(), reason, eventID)
   510  	return true
   511  }
   512  
   513  func (w *Watching) reportProfile(pType string, buf []byte, reason string, eventID string) {
   514  	defer func() {
   515  		if r := recover(); r != nil {
   516  			w.logf("Panic during report profile: %v", r)
   517  		}
   518  	}()
   519  
   520  	if atomic.LoadInt64(&w.stopped) == 1 {
   521  		return
   522  	}
   523  	conf := w.config.GetReporterConfigs()
   524  	if conf.active == 0 {
   525  		return
   526  	}
   527  	ch := w.rptEventsCh
   528  	select {
   529  	// Attempt to send
   530  	case ch <- rptEvent{
   531  		pType,
   532  		buf,
   533  		reason,
   534  		eventID,
   535  	}:
   536  	default:
   537  		w.logf("reporter channel is full, will ignore it")
   538  	}
   539  	return
   540  }
   541  
   542  // cpu start.
   543  func (w *Watching) cpuCheckAndDump(cpu int) {
   544  	cpuConfig := w.config.GetCPUConfigs()
   545  	if !cpuConfig.Enable {
   546  		return
   547  	}
   548  
   549  	if w.cpuCoolDownTime.After(time.Now()) {
   550  		w.logf("[Watching] cpu dump is in cooldown")
   551  		return
   552  	}
   553  
   554  	if triggered := w.cpuProfile(cpu, cpuConfig); triggered {
   555  		w.cpuCoolDownTime = time.Now().Add(w.config.CoolDown)
   556  		w.cpuTriggerCount++
   557  	}
   558  }
   559  
   560  func (w *Watching) cpuProfile(curCPUUsage int, c typeConfig) bool {
   561  	match, reason := matchRule(w.cpuStats, curCPUUsage, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
   562  	if !match {
   563  		// let user know why this should not dump
   564  		w.debugf(UniformLogFormat, "NODUMP", type2name[cpu],
   565  			c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   566  			w.cpuStats.data, curCPUUsage)
   567  
   568  		return false
   569  	}
   570  
   571  	w.logf("watching.cpu", UniformLogFormat, "pprof dump", type2name[cpu],
   572  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   573  		w.cpuStats.data, curCPUUsage)
   574  
   575  	bf, binFileName, err := getBinaryFileNameAndCreate(w.config.DumpPath, cpu, "")
   576  	if err != nil {
   577  		w.logf("[Watching] failed to create cpu profile file: %v", err.Error())
   578  		return false
   579  	}
   580  	defer bf.Close()
   581  
   582  	err = pprof.StartCPUProfile(bf)
   583  	if err != nil {
   584  		w.logf("[Watching] failed to profile cpu: %v", err.Error())
   585  		return false
   586  	}
   587  
   588  	time.Sleep(defaultCPUSamplingTime)
   589  	pprof.StopCPUProfile()
   590  
   591  	w.logf(UniformLogFormat, "pprof dump to log dir", type2name[cpu],
   592  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   593  		w.cpuStats.data, curCPUUsage)
   594  
   595  	if conf := w.config.GetReporterConfigs(); conf.active == 1 {
   596  		bfCpy, err := ioutil.ReadFile(binFileName)
   597  		if err != nil {
   598  			w.logf("fail to build copy of bf, err %v", err)
   599  			return true
   600  		}
   601  		w.reportProfile(type2name[cpu], bfCpy, reason, "")
   602  	}
   603  	return true
   604  }
   605  
   606  func (w *Watching) gcHeapCheckLoop(ch chan struct{}) {
   607  	for range ch {
   608  		w.gcHeapCheckAndDump()
   609  	}
   610  }
   611  
   612  func (w *Watching) gcHeapCheckAndDump() {
   613  	gcHeapConfig := w.config.GetGcHeapConfigs()
   614  	if !gcHeapConfig.Enable || atomic.LoadInt64(&w.stopped) == 1 {
   615  		return
   616  	}
   617  
   618  	memStats := new(runtime.MemStats)
   619  	runtime.ReadMemStats(memStats)
   620  
   621  	// TODO: we can only use NextGC for now since runtime haven't expose heapmarked yet
   622  	// and we hard code the gcPercent is 100 here.
   623  	// may introduce a new API debug.GCHeapMarked? it can also has better performance(no STW).
   624  	nextGC := memStats.NextGC
   625  	prevGC := nextGC / 2 //nolint:gomnd
   626  
   627  	memoryLimit, err := w.getMemoryLimit()
   628  	if memoryLimit == 0 || err != nil {
   629  		w.logf("[Watching] get memory limit failed, memory limit: %v, error: %v", memoryLimit, err)
   630  		return
   631  	}
   632  
   633  	ratio := int(100 * float64(prevGC) / float64(memoryLimit))
   634  	w.gcHeapStats.push(ratio)
   635  
   636  	w.gcCycleCount++
   637  	if w.gcCycleCount < minCollectCyclesBeforeDumpStart {
   638  		// at least collect some cycles
   639  		// before start to judge and dump
   640  		w.logf("[Watching] GC cycle warming up : %d", w.gcCycleCount)
   641  		return
   642  	}
   643  
   644  	if w.gcHeapCoolDownTime.After(time.Now()) {
   645  		w.logf("[Watching] GC heap dump is in cooldown")
   646  		return
   647  	}
   648  
   649  	if triggered := w.gcHeapProfile(ratio, w.gcHeapTriggered, gcHeapConfig); triggered {
   650  		if w.gcHeapTriggered {
   651  			// already dump twice, mark it false
   652  			w.gcHeapTriggered = false
   653  			w.gcHeapCoolDownTime = time.Now().Add(w.config.CoolDown)
   654  			w.gcHeapTriggerCount++
   655  		} else {
   656  			// force dump next time
   657  			w.gcHeapTriggered = true
   658  		}
   659  	}
   660  }
   661  
   662  func (w *Watching) getCPUCore() (float64, error) {
   663  	if w.config.cpuCore > 0 {
   664  		return w.config.cpuCore, nil
   665  	}
   666  
   667  	if w.config.UseGoProcAsCPUCore {
   668  		return float64(runtime.GOMAXPROCS(-1)), nil
   669  	}
   670  
   671  	if w.config.UseCGroup {
   672  		return getCGroupCPUCore()
   673  	}
   674  
   675  	return float64(runtime.NumCPU()), nil
   676  }
   677  
   678  // gcHeapProfile will dump profile twice when triggered once.
   679  // since the current memory profile will be merged after next GC cycle.
   680  // And we assume the finalizer will be called before next GC cycle(it will be usually).
   681  func (w *Watching) gcHeapProfile(gc int, force bool, c typeConfig) bool {
   682  	match, reason := matchRule(w.gcHeapStats, gc, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
   683  	if !force && !match {
   684  		// let user know why this should not dump
   685  		w.debugf(UniformLogFormat, "NODUMP", type2name[gcHeap],
   686  			c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
   687  			w.gcHeapStats.data, gc)
   688  
   689  		return false
   690  	}
   691  
   692  	w.logf("watching.gcheap", UniformLogFormat, "pprof", type2name[gcHeap],
   693  		c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
   694  		NotSupportTypeMaxConfig, w.gcHeapStats, gc)
   695  	// gcTriggerCount only increased after got both two profiles
   696  	eventID := fmt.Sprintf("heap-%d", w.grTriggerCount)
   697  
   698  	var buf bytes.Buffer
   699  	_ = pprof.Lookup("heap").WriteTo(&buf, int(w.config.DumpProfileType)) // nolint: errcheck
   700  	w.writeProfileDataToFile(buf, c, gcHeap, gc, w.gcHeapStats, eventID)
   701  
   702  	w.reportProfile(type2name[gcHeap], buf.Bytes(), reason, eventID)
   703  
   704  	return true
   705  }
   706  
   707  func (w *Watching) initEnvironment() {
   708  	// choose whether the max memory is limited by cgroup
   709  	if w.config.UseCGroup {
   710  		w.logf("[Watching] use cgroup to limit memory")
   711  	} else {
   712  		w.logf("[Watching] use the default memory percent calculated by gopsutil")
   713  	}
   714  	if w.config.Logger == os.Stdout && w.config.logConfigs.RotateEnable {
   715  		w.config.logConfigs.RotateEnable = false
   716  	}
   717  }
   718  
   719  func (w *Watching) EnableDump(curCPU int) (err error) {
   720  	if w.config.CPUMaxPercent != 0 && curCPU >= w.config.CPUMaxPercent {
   721  		return fmt.Errorf("current cpu percent [%v] is greater than the CPUMaxPercent [%v]", cpu, w.config.CPUMaxPercent)
   722  	}
   723  	return nil
   724  }
   725  
   726  func (w *Watching) writeGrProfileDataToFile(data bytes.Buffer, config groupConfigs, dumpType configureType, currentStat int) {
   727  	w.logf(UniformLogFormat, "pprof", type2name[dumpType],
   728  		config.TriggerMin, config.TriggerDiff, config.TriggerAbs,
   729  		config.GoroutineTriggerNumMax,
   730  		w.grNumStats.data, currentStat)
   731  
   732  	if err := writeFile(data, dumpType, w.config.DumpConfigs, ""); err != nil {
   733  		w.logf("%s", err.Error())
   734  	}
   735  }
   736  
   737  func (w *Watching) writeProfileDataToFile(data bytes.Buffer, opts typeConfig, dumpType configureType, currentStat int, ringStats ring, eventID string) {
   738  	w.logf(UniformLogFormat, "pprof", type2name[dumpType],
   739  		opts.TriggerMin, opts.TriggerDiff, opts.TriggerAbs,
   740  		NotSupportTypeMaxConfig, ringStats, currentStat)
   741  
   742  	if err := writeFile(data, dumpType, w.config.DumpConfigs, eventID); err != nil {
   743  		w.logf("%s", err.Error())
   744  	}
   745  }
   746  
   747  func (w *Watching) getMemoryLimit() (uint64, error) {
   748  	if w.config.memoryLimit > 0 {
   749  		return w.config.memoryLimit, nil
   750  	}
   751  
   752  	if w.config.UseCGroup {
   753  		return getCGroupMemoryLimit()
   754  	}
   755  	return getNormalMemoryLimit()
   756  }
   757  
   758  func NewWatching(opts ...options.Option) *Watching {
   759  	watching := &Watching{config: defaultConfig(), stopped: 1}
   760  	for _, opt := range opts {
   761  		opt(watching)
   762  	}
   763  	return watching
   764  }