github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/k8s/event_retry_handler.go (about) 1 package k8smonitor 2 3 import ( 4 "context" 5 "time" 6 7 "go.aporeto.io/enforcerd/internal/extractors/containermetadata" 8 "go.uber.org/zap" 9 ) 10 11 var ( 12 retryWaittimeUnit = time.Second 13 retryTimeout = time.Second * 30 14 ) 15 16 type startEventRetryFunc func(containermetadata.CommonKubernetesContainerMetadata, uint) 17 18 func newStartEventRetryFunc(mainCtx context.Context, extractor containermetadata.CommonContainerMetadataExtractor, startEvent startEventFunc) startEventRetryFunc { 19 return func(kmd containermetadata.CommonKubernetesContainerMetadata, retry uint) { 20 // we only care about pod sandboxes for restarts 21 // make sure that we stick to that 22 if kmd.Kind() != containermetadata.PodSandbox { 23 zap.L().Debug( 24 "K8sMonitor: startEventRetry: this is not a pod sandbox. Aborting retry...", 25 zap.Uint("retry", retry), 26 zap.String("kind", kmd.Kind().String()), 27 zap.String("id", kmd.ID()), 28 ) 29 return 30 } 31 32 // wait before we retry 33 waitTime := calculateWaitTime(retry) 34 zap.L().Debug( 35 "K8sMonitor: startEventRetry: waiting before retry...", 36 zap.Uint("retry", retry), 37 zap.Duration("waitTime", waitTime), 38 zap.String("id", kmd.ID()), 39 ) 40 select { 41 case <-mainCtx.Done(): 42 // no point in continuing if the main context is done 43 return 44 case <-time.After(waitTime): 45 } 46 47 // check if the sandbox still exists, otherwise we can abort the retries 48 if !extractor.Has(containermetadata.NewRuncArguments(containermetadata.StartAction, kmd.ID())) { 49 zap.L().Debug( 50 "K8sMonitor: startEventRetry: container for start event does not exist any longer. Aborting...", 51 zap.Uint("retry", retry), 52 zap.String("id", kmd.ID()), 53 ) 54 return 55 } 56 57 // now create a new context and retry 58 // the recursion occurs within the startEvent 59 ctx, cancel := context.WithTimeout(mainCtx, retryTimeout) 60 defer cancel() 61 if err := startEvent(ctx, kmd, retry); err != nil { 62 zap.L().Error( 63 "K8sMonitor: startEventRetry: failed to process start event on retry", 64 zap.Uint("retry", retry), 65 zap.Error(err), 66 zap.String("id", kmd.ID()), 67 zap.String("podUID", kmd.PodUID()), 68 zap.String("podName", kmd.PodName()), 69 zap.String("podNamespace", kmd.PodNamespace()), 70 ) 71 } 72 } 73 } 74 75 // calculateWaitTime calculates a fibonacci style backoff wait time based on the number of retry 76 // It uses `retryWaittimeUnit` as the base unit for the wait time 77 func calculateWaitTime(retry uint) time.Duration { 78 var n uint 79 switch retry { 80 case 0: 81 n = 0 82 case 1: 83 n = 1 84 case 2: 85 n = 1 86 case 3: 87 n = 2 88 case 4: 89 n = 3 90 case 5: 91 n = 5 92 case 6: 93 n = 8 94 case 7: 95 n = 13 96 case 8: 97 n = 21 98 case 9: 99 n = 34 100 default: 101 n = 55 102 } 103 return retryWaittimeUnit * time.Duration(n) 104 }