github.com/kubewharf/katalyst-core@v0.5.3/pkg/util/asyncworker/async_workers.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package asyncworker 18 19 import ( 20 "context" 21 "fmt" 22 "reflect" 23 "strings" 24 "time" 25 26 "k8s.io/apimachinery/pkg/util/wait" 27 28 "github.com/kubewharf/katalyst-core/pkg/metrics" 29 "github.com/kubewharf/katalyst-core/pkg/util/general" 30 ) 31 32 func NewAsyncWorkers(name string, emitter metrics.MetricEmitter) *AsyncWorkers { 33 return &AsyncWorkers{ 34 name: name, 35 emitter: emitter, 36 lastUndeliveredWork: make(map[string]*Work), 37 workStatuses: make(map[string]*workStatus), 38 } 39 } 40 41 func (aws *AsyncWorkers) AddWork(workName string, work *Work, policy DuplicateWorkPolicy) error { 42 aws.workLock.Lock() 43 defer aws.workLock.Unlock() 44 45 err := validateWork(work) 46 if err != nil { 47 return fmt.Errorf("validateWork for: %s failed with error: %v", workName, err) 48 } 49 50 general.InfoS("add work", 51 "AsyncWorkers", aws.name, 52 "workName", workName, 53 "params", work.Params, 54 "deliveredAt", work.DeliveredAt) 55 56 status, ok := aws.workStatuses[workName] 57 if !ok || status == nil { 58 general.InfoS("create status for work", 59 "AsyncWorkers", aws.name, "workName", workName) 60 status = &workStatus{} 61 aws.workStatuses[workName] = status 62 } else if status.IsWorking() && policy == DuplicateWorkPolicyDiscard { 63 general.InfoS("work %v already exists, discard new work", workName) 64 return nil 65 } 66 67 // dispatch a request to the pod work if none are running 68 if !status.IsWorking() { 69 general.InfoS("status isn't working, handle work immediately", 70 "AsyncWorkers", aws.name, 71 "workName", workName, 72 "params", work.Params, 73 "deliveredAt", work.DeliveredAt) 74 75 ctx := aws.contextForWork(workName, work) 76 go aws.handleWork(ctx, workName, work) 77 78 return nil 79 } 80 81 general.InfoS("status is working, queue work", 82 "AsyncWorkers", aws.name, 83 "workName", workName, 84 "params", work.Params, 85 "deliveredAt", work.DeliveredAt) 86 87 if undelivered, ok := aws.lastUndeliveredWork[workName]; ok { 88 general.InfoS("overwrite undelivered work", 89 "AsyncWorkers", aws.name, 90 "workName", workName, 91 "old params", undelivered.Params, 92 "old deliveredAt", undelivered.DeliveredAt, 93 "new params", work.Params, 94 "new deliveredAt", work.DeliveredAt) 95 } 96 97 // always set the most recent work 98 aws.lastUndeliveredWork[workName] = work 99 100 if status.cancelFn == nil { 101 general.Fatalf("[AsyncWorkers: %s] %s nil cancelFn in working status", aws.name, workName) 102 } else if status.work == nil { 103 general.Fatalf("[AsyncWorkers: %s] %s nil work in working status", aws.name, workName) 104 } 105 106 general.InfoS("canceling current working work", 107 "AsyncWorkers", aws.name, 108 "workName", workName, 109 "params", status.work.Params, 110 "deliveredAt", status.work.DeliveredAt) 111 status.cancelFn() 112 113 return nil 114 } 115 116 func (aws *AsyncWorkers) handleWork(ctx context.Context, workName string, work *Work) { 117 var handleErr error 118 119 defer func() { 120 if r := recover(); r != nil { 121 handleErr = fmt.Errorf("recover from %v", r) 122 123 metricErr := EmitCustomizedAsyncedMetrics(ctx, 124 metricNameAsyncWorkPanic, 1, 125 metrics.ConvertMapToTags(map[string]string{ 126 "workName": workName, 127 })...) 128 129 if metricErr != nil { 130 general.Errorf("emit metric(%s:%d) failed with err: %v", 131 metricNameAsyncWorkDurationMs, 1, metricErr) 132 } 133 } 134 135 aws.completeWork(workName, work, handleErr) 136 }() 137 138 general.InfoS("handle work", 139 "AsyncWorkers", aws.name, 140 "workName", workName, 141 "params", work.Params, 142 "deliveredAt", work.DeliveredAt) 143 144 funcValue := reflect.ValueOf(work.Fn) 145 146 // filling up parameters for the passed functions 147 paramValues := make([]reflect.Value, 1, len(work.Params)+1) 148 paramValues[0] = reflect.ValueOf(ctx) 149 for _, param := range work.Params { 150 paramValues = append(paramValues, reflect.ValueOf(param)) 151 } 152 153 startTime := time.Now() 154 funcRets := funcValue.Call(paramValues) 155 workDurationMs := time.Since(startTime).Milliseconds() 156 157 if len(funcRets) != 1 { 158 handleErr = fmt.Errorf("work Fn returns invalid number: %d of return values", len(funcRets)) 159 } else if funcRets[0].Interface() != nil { 160 var ok bool 161 handleErr, ok = funcRets[0].Interface().(error) 162 163 if !ok { 164 handleErr = fmt.Errorf("work Fn returns return value: %v of invalid type", funcRets[0].Interface()) 165 } 166 } 167 168 metricErr := EmitCustomizedAsyncedMetrics(ctx, 169 metricNameAsyncWorkDurationMs, workDurationMs, 170 metrics.ConvertMapToTags(map[string]string{ 171 "workName": workName, 172 })...) 173 174 if metricErr != nil { 175 general.Errorf("emit metric(%s:%d) failed with err: %v", 176 metricNameAsyncWorkDurationMs, workDurationMs, metricErr) 177 } 178 } 179 180 func (aws *AsyncWorkers) completeWork(workName string, completedWork *Work, workErr error) { 181 // TODO: support retrying if workErr != nil 182 general.InfoS("complete work", 183 "AsyncWorkers", aws.name, 184 "workName", workName, 185 "params", completedWork.Params, 186 "deliveredAt", completedWork.DeliveredAt, 187 "workErr", workErr) 188 189 aws.workLock.Lock() 190 defer aws.workLock.Unlock() 191 192 if work, exists := aws.lastUndeliveredWork[workName]; exists { 193 194 ctx := aws.contextForWork(workName, work) 195 196 go aws.handleWork(ctx, workName, work) 197 delete(aws.lastUndeliveredWork, workName) 198 } else { 199 aws.resetWorkStatus(workName) 200 } 201 } 202 203 // contextForWork returns or initializes the appropriate context for a known 204 // work. And point status.work to the work. If the current context is expired, it is reset. 205 // It should be called in function protected by aws.workLock. 206 func (aws *AsyncWorkers) contextForWork(workName string, work *Work) context.Context { 207 if work == nil { 208 general.Fatalf("[AsyncWorkers: %s] contextForWork: %s got nil work", aws.name, workName) 209 } 210 211 status, ok := aws.workStatuses[workName] 212 if !ok || status == nil { 213 general.Fatalf("[AsyncWorkers: %s] contextForWork: %s got no status", aws.name, workName) 214 } 215 if status.ctx == nil || status.ctx.Err() == context.Canceled { 216 ctx := context.Background() 217 if names := strings.Split(workName, WorkNameSeperator); len(names) > 0 { 218 ctx = context.WithValue(ctx, contextKeyMetricName, names[len(names)-1]) 219 ctx = context.WithValue(ctx, contextKeyMetricEmitter, aws.emitter) 220 } 221 status.ctx, status.cancelFn = context.WithCancel(ctx) 222 223 } 224 status.working = true 225 status.work = work 226 status.startedAt = time.Now() 227 return status.ctx 228 } 229 230 // resetWorkStatus resets work status corresponding to workName, 231 // when there is no work of workName to do. 232 // It should be called in function protected by aws.workLock. 233 func (aws *AsyncWorkers) resetWorkStatus(workName string) { 234 status, ok := aws.workStatuses[workName] 235 if !ok || status == nil { 236 general.Fatalf("[AsyncWorkers: %s] contextForWork: %s got no status", 237 aws.name, workName) 238 } 239 240 status.working = false 241 status.work = nil 242 status.startedAt = time.Time{} 243 } 244 245 func (aws *AsyncWorkers) Start(stopCh <-chan struct{}) error { 246 go wait.Until(aws.cleanupWorkStatus, 10*time.Second, stopCh) 247 return nil 248 } 249 250 // cleanupWorkStatus cleans up work status not in working 251 func (aws *AsyncWorkers) cleanupWorkStatus() { 252 aws.workLock.Lock() 253 defer aws.workLock.Unlock() 254 255 for workName, status := range aws.workStatuses { 256 if status == nil { 257 general.Errorf("[AsyncWorkers: %s] nil status for %s, clean it", aws.name, workName) 258 delete(aws.workStatuses, workName) 259 } else if !status.working { 260 general.Errorf("[AsyncWorkers: %s] status for %s not in working, clean it", aws.name, workName) 261 delete(aws.workStatuses, workName) 262 } 263 } 264 } 265 266 func (aws *AsyncWorkers) WorkExists(workName string) bool { 267 aws.workLock.Lock() 268 defer aws.workLock.Unlock() 269 270 status, hasRunningWork := aws.workStatuses[workName] 271 if hasRunningWork && status.IsWorking() { 272 return true 273 } 274 275 _, hasUndeliveredWork := aws.lastUndeliveredWork[workName] 276 if hasUndeliveredWork { 277 return true 278 } 279 280 return false 281 }