github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/hk/housekeeper.go (about) 1 // Package hk provides mechanism for registering cleanup 2 // functions which are invoked at specified intervals. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package hk 7 8 import ( 9 "container/heap" 10 "os" 11 "os/signal" 12 "syscall" 13 "time" 14 15 "github.com/NVIDIA/aistore/cmn/atomic" 16 "github.com/NVIDIA/aistore/cmn/cos" 17 "github.com/NVIDIA/aistore/cmn/debug" 18 "github.com/NVIDIA/aistore/cmn/mono" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 ) 21 22 const NameSuffix = ".gc" // reg name suffix 23 24 const ( 25 DayInterval = 24 * time.Hour 26 UnregInterval = 365 * DayInterval // to unregister upon return from the callback 27 ) 28 29 type ( 30 hkcb func() time.Duration 31 request struct { 32 f hkcb 33 name string 34 initialInterval time.Duration 35 registering bool 36 } 37 timedAction struct { 38 f hkcb 39 name string 40 updateTime int64 41 } 42 timedActions []timedAction 43 44 housekeeper struct { 45 stopCh cos.StopCh 46 sigCh chan os.Signal 47 actions *timedActions 48 timer *time.Timer 49 workCh chan request 50 running atomic.Bool 51 } 52 ) 53 54 var DefaultHK *housekeeper 55 56 // interface guard 57 var _ cos.Runner = (*housekeeper)(nil) 58 59 func TestInit() { 60 _init(false) 61 } 62 63 func Init() { 64 _init(true) 65 } 66 67 func _init(mustRun bool) { 68 DefaultHK = &housekeeper{ 69 workCh: make(chan request, 512), 70 sigCh: make(chan os.Signal, 1), 71 actions: &timedActions{}, 72 } 73 DefaultHK.stopCh.Init() 74 if mustRun { 75 DefaultHK.running.Store(false) 76 } else { 77 DefaultHK.running.Store(true) // tests only 78 } 79 heap.Init(DefaultHK.actions) 80 } 81 82 ////////////////// 83 // timedActions // 84 ////////////////// 85 86 func (tc timedActions) Len() int { return len(tc) } 87 func (tc timedActions) Less(i, j int) bool { return tc[i].updateTime < tc[j].updateTime } 88 func (tc timedActions) Swap(i, j int) { tc[i], tc[j] = tc[j], tc[i] } 89 func (tc timedActions) Peek() *timedAction { return &tc[0] } 90 func (tc *timedActions) Push(x any) { *tc = append(*tc, x.(timedAction)) } 91 92 func (tc *timedActions) Pop() any { 93 old := *tc 94 n := len(old) 95 item := old[n-1] 96 *tc = old[0 : n-1] 97 return item 98 } 99 100 ///////////////// 101 // housekeeper // 102 ///////////////// 103 104 func WaitStarted() { 105 for !DefaultHK.running.Load() { 106 time.Sleep(time.Second) 107 } 108 } 109 110 func IsReg(name string) bool { return DefaultHK.byName(name) != -1 } // see "duplicated" below 111 112 func Reg(name string, f hkcb, interval time.Duration) { 113 debug.Assert(nlog.Stopping() || DefaultHK.running.Load()) 114 DefaultHK.workCh <- request{ 115 registering: true, 116 name: name, 117 f: f, 118 initialInterval: interval, 119 } 120 } 121 122 func Unreg(name string) { 123 debug.Assert(nlog.Stopping() || DefaultHK.running.Load()) 124 DefaultHK.workCh <- request{ 125 registering: false, 126 name: name, 127 } 128 } 129 130 func (*housekeeper) Name() string { return "housekeeper" } 131 132 func (hk *housekeeper) terminate() { 133 hk.timer.Stop() 134 hk.running.Store(false) 135 } 136 137 func (hk *housekeeper) Run() (err error) { 138 signal.Notify(hk.sigCh, 139 syscall.SIGHUP, // kill -SIGHUP 140 syscall.SIGINT, // kill -SIGINT (Ctrl-C) 141 syscall.SIGTERM, // kill -SIGTERM 142 syscall.SIGQUIT, // kill -SIGQUIT 143 ) 144 hk.timer = time.NewTimer(time.Hour) 145 hk.running.Store(true) 146 err = hk._run() 147 hk.terminate() 148 return 149 } 150 151 func (hk *housekeeper) _run() error { 152 for { 153 select { 154 case <-hk.stopCh.Listen(): 155 return nil 156 case <-hk.timer.C: 157 if hk.actions.Len() == 0 { 158 break 159 } 160 // run the callback and update heap 161 var ( 162 item = hk.actions.Peek() 163 started = mono.NanoTime() 164 interval = item.f() 165 ) 166 if interval == UnregInterval { 167 heap.Remove(hk.actions, 0) 168 } else { 169 now := mono.NanoTime() 170 item.updateTime = now + interval.Nanoseconds() 171 heap.Fix(hk.actions, 0) 172 // system under extreme pressure or 173 // an illegal lock/sleep type contention inside the callback 174 if d := time.Duration(now - started); d > time.Second { 175 nlog.Warningln("hk call(", item.name, ") duration exceeds 1s:", d.String()) 176 } 177 } 178 hk.updateTimer() 179 case req := <-hk.workCh: 180 if req.registering { 181 // duplicate name 182 if hk.byName(req.name) != -1 { 183 nlog.Errorf("hk: duplicated name %q - not registering", req.name) 184 break 185 } 186 initialInterval := req.initialInterval 187 if req.initialInterval == 0 { 188 initialInterval = req.f() 189 } 190 nt := mono.NanoTime() + initialInterval.Nanoseconds() // next time 191 heap.Push(hk.actions, timedAction{name: req.name, f: req.f, updateTime: nt}) 192 } else { 193 idx := hk.byName(req.name) 194 if idx >= 0 { 195 heap.Remove(hk.actions, idx) 196 } else { 197 debug.Assert(false, req.name) 198 nlog.Warningln(req.name, "already removed") 199 } 200 } 201 hk.updateTimer() 202 case s, ok := <-hk.sigCh: 203 if ok { 204 signal.Stop(hk.sigCh) 205 err := cos.NewSignalError(s.(syscall.Signal)) 206 hk.Stop(err) 207 return err 208 } 209 } 210 } 211 } 212 213 func (hk *housekeeper) updateTimer() { 214 if hk.actions.Len() == 0 { 215 hk.timer.Stop() 216 return 217 } 218 d := hk.actions.Peek().updateTime - mono.NanoTime() 219 hk.timer.Reset(time.Duration(d)) 220 } 221 222 func (hk *housekeeper) byName(name string) int { 223 for i, tc := range *hk.actions { 224 if tc.name == name { 225 return i 226 } 227 } 228 return -1 229 } 230 231 func (*housekeeper) Stop(_ error) { DefaultHK.stopCh.Close() }