github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/hk/housekeeper.go (about)

     1  // Package hk provides mechanism for registering cleanup
     2  // functions which are invoked at specified intervals.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package hk
     7  
     8  import (
     9  	"container/heap"
    10  	"os"
    11  	"os/signal"
    12  	"syscall"
    13  	"time"
    14  
    15  	"github.com/NVIDIA/aistore/cmn/atomic"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/cmn/mono"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  )
    21  
    22  const NameSuffix = ".gc" // reg name suffix
    23  
    24  const (
    25  	DayInterval   = 24 * time.Hour
    26  	UnregInterval = 365 * DayInterval // to unregister upon return from the callback
    27  )
    28  
    29  type (
    30  	hkcb    func() time.Duration
    31  	request struct {
    32  		f               hkcb
    33  		name            string
    34  		initialInterval time.Duration
    35  		registering     bool
    36  	}
    37  	timedAction struct {
    38  		f          hkcb
    39  		name       string
    40  		updateTime int64
    41  	}
    42  	timedActions []timedAction
    43  
    44  	housekeeper struct {
    45  		stopCh  cos.StopCh
    46  		sigCh   chan os.Signal
    47  		actions *timedActions
    48  		timer   *time.Timer
    49  		workCh  chan request
    50  		running atomic.Bool
    51  	}
    52  )
    53  
    54  var DefaultHK *housekeeper
    55  
    56  // interface guard
    57  var _ cos.Runner = (*housekeeper)(nil)
    58  
    59  func TestInit() {
    60  	_init(false)
    61  }
    62  
    63  func Init() {
    64  	_init(true)
    65  }
    66  
    67  func _init(mustRun bool) {
    68  	DefaultHK = &housekeeper{
    69  		workCh:  make(chan request, 512),
    70  		sigCh:   make(chan os.Signal, 1),
    71  		actions: &timedActions{},
    72  	}
    73  	DefaultHK.stopCh.Init()
    74  	if mustRun {
    75  		DefaultHK.running.Store(false)
    76  	} else {
    77  		DefaultHK.running.Store(true) // tests only
    78  	}
    79  	heap.Init(DefaultHK.actions)
    80  }
    81  
    82  //////////////////
    83  // timedActions //
    84  //////////////////
    85  
    86  func (tc timedActions) Len() int           { return len(tc) }
    87  func (tc timedActions) Less(i, j int) bool { return tc[i].updateTime < tc[j].updateTime }
    88  func (tc timedActions) Swap(i, j int)      { tc[i], tc[j] = tc[j], tc[i] }
    89  func (tc timedActions) Peek() *timedAction { return &tc[0] }
    90  func (tc *timedActions) Push(x any)        { *tc = append(*tc, x.(timedAction)) }
    91  
    92  func (tc *timedActions) Pop() any {
    93  	old := *tc
    94  	n := len(old)
    95  	item := old[n-1]
    96  	*tc = old[0 : n-1]
    97  	return item
    98  }
    99  
   100  /////////////////
   101  // housekeeper //
   102  /////////////////
   103  
   104  func WaitStarted() {
   105  	for !DefaultHK.running.Load() {
   106  		time.Sleep(time.Second)
   107  	}
   108  }
   109  
   110  func IsReg(name string) bool { return DefaultHK.byName(name) != -1 } // see "duplicated" below
   111  
   112  func Reg(name string, f hkcb, interval time.Duration) {
   113  	debug.Assert(nlog.Stopping() || DefaultHK.running.Load())
   114  	DefaultHK.workCh <- request{
   115  		registering:     true,
   116  		name:            name,
   117  		f:               f,
   118  		initialInterval: interval,
   119  	}
   120  }
   121  
   122  func Unreg(name string) {
   123  	debug.Assert(nlog.Stopping() || DefaultHK.running.Load())
   124  	DefaultHK.workCh <- request{
   125  		registering: false,
   126  		name:        name,
   127  	}
   128  }
   129  
   130  func (*housekeeper) Name() string { return "housekeeper" }
   131  
   132  func (hk *housekeeper) terminate() {
   133  	hk.timer.Stop()
   134  	hk.running.Store(false)
   135  }
   136  
   137  func (hk *housekeeper) Run() (err error) {
   138  	signal.Notify(hk.sigCh,
   139  		syscall.SIGHUP,  // kill -SIGHUP
   140  		syscall.SIGINT,  // kill -SIGINT (Ctrl-C)
   141  		syscall.SIGTERM, // kill -SIGTERM
   142  		syscall.SIGQUIT, // kill -SIGQUIT
   143  	)
   144  	hk.timer = time.NewTimer(time.Hour)
   145  	hk.running.Store(true)
   146  	err = hk._run()
   147  	hk.terminate()
   148  	return
   149  }
   150  
   151  func (hk *housekeeper) _run() error {
   152  	for {
   153  		select {
   154  		case <-hk.stopCh.Listen():
   155  			return nil
   156  		case <-hk.timer.C:
   157  			if hk.actions.Len() == 0 {
   158  				break
   159  			}
   160  			// run the callback and update heap
   161  			var (
   162  				item     = hk.actions.Peek()
   163  				started  = mono.NanoTime()
   164  				interval = item.f()
   165  			)
   166  			if interval == UnregInterval {
   167  				heap.Remove(hk.actions, 0)
   168  			} else {
   169  				now := mono.NanoTime()
   170  				item.updateTime = now + interval.Nanoseconds()
   171  				heap.Fix(hk.actions, 0)
   172  				// system under extreme pressure or
   173  				// an illegal lock/sleep type contention inside the callback
   174  				if d := time.Duration(now - started); d > time.Second {
   175  					nlog.Warningln("hk call(", item.name, ") duration exceeds 1s:", d.String())
   176  				}
   177  			}
   178  			hk.updateTimer()
   179  		case req := <-hk.workCh:
   180  			if req.registering {
   181  				// duplicate name
   182  				if hk.byName(req.name) != -1 {
   183  					nlog.Errorf("hk: duplicated name %q - not registering", req.name)
   184  					break
   185  				}
   186  				initialInterval := req.initialInterval
   187  				if req.initialInterval == 0 {
   188  					initialInterval = req.f()
   189  				}
   190  				nt := mono.NanoTime() + initialInterval.Nanoseconds() // next time
   191  				heap.Push(hk.actions, timedAction{name: req.name, f: req.f, updateTime: nt})
   192  			} else {
   193  				idx := hk.byName(req.name)
   194  				if idx >= 0 {
   195  					heap.Remove(hk.actions, idx)
   196  				} else {
   197  					debug.Assert(false, req.name)
   198  					nlog.Warningln(req.name, "already removed")
   199  				}
   200  			}
   201  			hk.updateTimer()
   202  		case s, ok := <-hk.sigCh:
   203  			if ok {
   204  				signal.Stop(hk.sigCh)
   205  				err := cos.NewSignalError(s.(syscall.Signal))
   206  				hk.Stop(err)
   207  				return err
   208  			}
   209  		}
   210  	}
   211  }
   212  
   213  func (hk *housekeeper) updateTimer() {
   214  	if hk.actions.Len() == 0 {
   215  		hk.timer.Stop()
   216  		return
   217  	}
   218  	d := hk.actions.Peek().updateTime - mono.NanoTime()
   219  	hk.timer.Reset(time.Duration(d))
   220  }
   221  
   222  func (hk *housekeeper) byName(name string) int {
   223  	for i, tc := range *hk.actions {
   224  		if tc.name == name {
   225  			return i
   226  		}
   227  	}
   228  	return -1
   229  }
   230  
   231  func (*housekeeper) Stop(_ error) { DefaultHK.stopCh.Close() }