github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/castai_controller.go (about) 1 package state 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync/atomic" 8 "time" 9 10 castaipb "github.com/castai/kvisor/api/v1/runtime" 11 "github.com/castai/kvisor/cmd/controller/kube" 12 "github.com/castai/kvisor/pkg/castai" 13 "github.com/castai/kvisor/pkg/logging" 14 "golang.org/x/sync/errgroup" 15 ) 16 17 type CastaiConfig struct { 18 RemoteConfigSyncDuration time.Duration `validate:"required"` 19 } 20 21 func NewCastaiController(log *logging.Logger, cfg CastaiConfig, appProtoConfig *castaipb.ControllerConfig, kubeClient *kube.Client, castaiClient *castai.Client) *CastaiController { 22 if cfg.RemoteConfigSyncDuration == 0 { 23 cfg.RemoteConfigSyncDuration = 5 * time.Minute 24 } 25 return &CastaiController{ 26 id: "castai", 27 enabled: castaiClient != nil, 28 log: log.WithField("component", "castai_ctrl"), 29 kubeClient: kubeClient, 30 cfg: cfg, 31 appProtoConfig: appProtoConfig, 32 castaiClient: castaiClient, 33 remoteConfigFetchErrors: &atomic.Int64{}, 34 remoteConfigInitialSyncTimeout: 1 * time.Minute, 35 remoteConfigRetryWaitDuration: 20 * time.Second, 36 removeConfigMaxFailures: 10, 37 streamReconnectWaitDuration: 2 * time.Second, 38 } 39 } 40 41 type CastaiController struct { 42 id string 43 enabled bool 44 log *logging.Logger 45 kubeClient *kube.Client 46 cfg CastaiConfig 47 castaiClient *castai.Client 48 appProtoConfig *castaipb.ControllerConfig 49 50 remoteConfigFetchErrors *atomic.Int64 51 removeConfigMaxFailures int64 52 streamReconnectWaitDuration time.Duration 53 remoteConfigRetryWaitDuration time.Duration 54 remoteConfigInitialSyncTimeout time.Duration 55 } 56 57 func (c *CastaiController) Enabled() bool { 58 return c.enabled 59 } 60 61 func (c *CastaiController) Run(ctx context.Context) error { 62 c.log.Info("running") 63 defer c.log.Infof("stopping") 64 65 ctxCtx, cancel := context.WithTimeout(ctx, c.remoteConfigInitialSyncTimeout) 66 defer cancel() 67 68 if err := c.fetchInitialRemoteConfig(ctxCtx); err != nil { 69 return fmt.Errorf("fetching initial config: %w", err) 70 } 71 72 errg, ctx := errgroup.WithContext(ctx) 73 errg.Go(func() error { 74 return c.runRemoteConfigSyncLoop(ctx) 75 }) 76 77 return errg.Wait() 78 } 79 80 func (c *CastaiController) fetchConfig(ctx context.Context, req *castaipb.GetConfigurationRequest) (*castaipb.Configuration, error) { 81 resp, err := c.castaiClient.GRPC.GetConfiguration(ctx, req) 82 if err != nil { 83 return nil, err 84 } 85 if resp.Config == nil { 86 resp.Config = &castaipb.Configuration{} 87 } 88 return resp.Config, nil 89 } 90 91 func (c *CastaiController) fetchInitialRemoteConfig(ctx context.Context) error { 92 for { 93 select { 94 case <-ctx.Done(): 95 return ctx.Err() 96 default: 97 } 98 99 cfg, err := c.fetchConfig(ctx, &castaipb.GetConfigurationRequest{ 100 CurrentConfig: &castaipb.GetConfigurationRequest_Controller{ 101 Controller: c.appProtoConfig, 102 }, 103 }) 104 if err != nil { 105 c.log.Errorf("fetching initial config: %v", err) 106 sleep(ctx, c.remoteConfigRetryWaitDuration) 107 continue 108 } 109 c.updateRemoteConfig(cfg) 110 c.log.Info("initial config synced") 111 return nil 112 } 113 } 114 115 func (c *CastaiController) runRemoteConfigSyncLoop(ctx context.Context) error { 116 ticker := time.NewTicker(c.cfg.RemoteConfigSyncDuration) 117 defer ticker.Stop() 118 119 for { 120 select { 121 case <-ctx.Done(): 122 return ctx.Err() 123 case <-ticker.C: 124 cfg, err := c.fetchConfig(ctx, &castaipb.GetConfigurationRequest{}) 125 if err != nil { 126 if errors.Is(err, context.Canceled) { 127 return err 128 } 129 c.log.Errorf("fetching config: %v", err) 130 fetchErrors := c.remoteConfigFetchErrors.Add(1) 131 if fetchErrors >= c.removeConfigMaxFailures { 132 return fmt.Errorf("maximum %d remote config fetch errors reached", fetchErrors) 133 } 134 continue 135 } 136 c.remoteConfigFetchErrors.Store(0) 137 c.updateRemoteConfig(cfg) 138 } 139 } 140 } 141 142 func (c *CastaiController) updateRemoteConfig(cfg *castaipb.Configuration) { 143 }