github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/k8s.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 // This file contains functions related to Kubernetes (k8s) support for the runner. 6 // The runner can use k8s to watch and load ConfigMap information that it can use 7 // to manage its life cycle and in the future to load configuration information. 8 // 9 // The choice to make use of the package from Eric Chiang is driven by the 10 // package dependency issues with using the official go client. It rivals 11 // the spagetti dependencies of Dockers buildkit, borderline horrific. The chosen 12 // package has a single dependency and trades off using generated protobuf structures 13 // and so it wired to the k8s versions via that method, a tradeoff I'm willing to 14 // make based on my attempts with BuildKit. 15 16 import ( 17 "context" 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/davecgh/go-spew/spew" 23 "github.com/karlmutch/k8s" 24 core "github.com/karlmutch/k8s/apis/core/v1" 25 26 "github.com/go-stack/stack" 27 "github.com/lthibault/jitterbug" 28 29 "github.com/jjeffery/kv" // MIT License 30 31 "github.com/leaf-ai/studio-go-runner/internal/types" 32 ) 33 34 var ( 35 k8sClient *k8s.Client 36 k8sInitErr kv.Error 37 38 _ = attemptK8sStart() 39 40 protect sync.Mutex 41 ) 42 43 func attemptK8sStart() (err kv.Error) { 44 protect.Lock() 45 defer protect.Unlock() 46 47 if client, errGo := k8s.NewInClusterClient(); errGo != nil { 48 k8sInitErr = kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 49 } else { 50 k8sClient = client 51 } 52 53 return k8sInitErr 54 } 55 56 func watchCMaps(ctx context.Context, namespace string) (cmChange chan *core.ConfigMap, err kv.Error) { 57 58 configMap := core.ConfigMap{} 59 watcher, errGo := k8sClient.Watch(ctx, namespace, &configMap) 60 if errGo != nil { 61 return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 62 } 63 64 cmChange = make(chan *core.ConfigMap, 1) 65 go func() { 66 67 defer func() { 68 if watcher != nil { 69 watcher.Close() // Always close the returned watcher. 70 } 71 }() 72 73 for { 74 cm := &core.ConfigMap{} 75 // Next does not support cancellation and is blocking so we have to 76 // abandon this thread and simply let it run unmanaged 77 _, err := watcher.Next(cm) 78 if err != nil { 79 select { 80 case <-ctx.Done(): 81 return 82 default: 83 } 84 if watcher != nil { 85 watcher.Close() 86 watcher = nil 87 } 88 // watcher encountered and error, create a new watcher 89 watcher, _ = k8sClient.Watch(ctx, namespace, &configMap) 90 continue 91 } 92 select { 93 case cmChange <- cm: 94 case <-time.After(time.Second): 95 spew.Dump(*cm) 96 } 97 } 98 }() 99 return cmChange, nil 100 } 101 102 // MonitorK8s is used to send appropriate errors into an error reporting channel 103 // on a regular basis if the k8s connectivity state changes 104 // 105 func MonitorK8s(ctx context.Context, errC chan<- kv.Error) { 106 107 t := jitterbug.New(time.Second*30, &jitterbug.Norm{Stdev: time.Second * 3}) 108 defer t.Stop() 109 110 k8sLastErr := kv.NewError("") 111 for { 112 select { 113 case <-ctx.Done(): 114 return 115 116 case <-t.C: 117 func() { 118 attemptRestart := false 119 protect.Lock() 120 121 if k8sClient == nil { 122 if k8sInitErr != nil && k8sInitErr != k8sLastErr { 123 // Attempt to send the error back to any listener and timeout after a second. 124 // The jitter based timer will provide another opportunity in 30 or thereabouts 125 // seconds should this timeout 126 select { 127 case errC <- k8sInitErr: 128 k8sLastErr = k8sInitErr 129 case <-time.After(time.Second): 130 } 131 } 132 attemptRestart = true 133 134 } 135 protect.Unlock() 136 137 if attemptRestart { 138 // Doing this re-applies the lock so we do it outside of the earlier block 139 attemptK8sStart() 140 } 141 }() 142 } 143 } 144 } 145 146 // IsAliveK8s is used to extract any kv.in the state of the k8s client api connection. 147 // 148 // A nil returned indicates k8s is working and in use, otherwise a descriptive error 149 // is returned. 150 // 151 func IsAliveK8s() (err kv.Error) { 152 protect.Lock() 153 defer protect.Unlock() 154 155 if k8sClient == nil { 156 k8sInitErr = kv.NewError("Kubernetes uninitialized or no cluster present").With("stack", stack.Trace().TrimRuntime()) 157 } 158 return k8sInitErr 159 } 160 161 // ConfigK8s is used to pull the values from a named config map in k8s 162 // 163 // This function will return an empty map and and error value on failure. 164 // 165 func ConfigK8s(ctx context.Context, namespace string, name string) (values map[string]string, err kv.Error) { 166 values = map[string]string{} 167 168 if err = IsAliveK8s(); err != nil { 169 return values, nil 170 } 171 cfg := &core.ConfigMap{} 172 173 if errGo := k8sClient.Get(ctx, namespace, name, cfg); errGo != nil { 174 return values, kv.Wrap(errGo).With("namespace", namespace).With("name", name).With("stack", stack.Trace().TrimRuntime()) 175 } 176 177 if name == *cfg.Metadata.Name { 178 fmt.Println(spew.Sdump(cfg.Data), stack.Trace().TrimRuntime()) 179 return cfg.Data, nil 180 } 181 return values, kv.NewError("configMap not found").With("namespace", namespace).With("name", name).With("stack", stack.Trace().TrimRuntime()) 182 } 183 184 // K8sStateUpdate encapsulates the known kubernetes state within which the runner finds itself. 185 // 186 type K8sStateUpdate struct { 187 Name string 188 State types.K8sState 189 } 190 191 // ListenK8s will register a listener to watch for pod specific configMaps in k8s 192 // and will relay state changes to a channel, the global state map should exist 193 // at the bare minimum. A state change in either map superseded any previous 194 // state. 195 // 196 // This is a blocking function that will return either upon an error in API calls 197 // to the cluster API or when the ctx is Done(). 198 // 199 func ListenK8s(ctx context.Context, namespace string, globalMap string, podMap string, updateC chan<- K8sStateUpdate, errC chan<- kv.Error) (err kv.Error) { 200 201 // If k8s is not being used ignore this feature 202 if err = IsAliveK8s(); err != nil { 203 return err 204 } 205 206 // Starts the application level state watching 207 currentState := K8sStateUpdate{ 208 State: types.K8sUnknown, 209 } 210 211 // Start the k8s configMap watcher 212 cmChanges, err := watchCMaps(ctx, namespace) 213 if err != nil { 214 // The implication of an error here is that we will never get updates from k8s 215 return err 216 } 217 218 fmt.Println("k8s watcher starting in namespace", namespace) 219 defer fmt.Println("k8s watcher stopping") 220 221 // Once every 3 minutes for so we will force the state propagation 222 // to ensure that modules started after this module has started see something 223 refresh := jitterbug.New(time.Minute*3, &jitterbug.Norm{Stdev: time.Second * 15}) 224 defer refresh.Stop() 225 226 for { 227 select { 228 case <-ctx.Done(): 229 return nil 230 case <-refresh.C: 231 // Try resending an existing state to listeners to refresh things 232 select { 233 case updateC <- currentState: 234 case <-time.After(2 * time.Second): 235 } 236 case cm := <-cmChanges: 237 if cm == nil { 238 fmt.Println("k8s watcher channel closed", namespace) 239 return 240 } 241 if *cm.Metadata.Namespace == namespace && (*cm.Metadata.Name == globalMap || *cm.Metadata.Name == podMap) { 242 if state, _ := cm.Data["STATE"]; len(state) != 0 { 243 newState, errGo := types.K8sStateString(state) 244 if errGo != nil { 245 msg := kv.Wrap(errGo).With("namespace", namespace).With("config", *cm.Metadata.Name).With("state", state).With("stack", stack.Trace().TrimRuntime()) 246 select { 247 case errC <- msg: 248 case <-time.After(2 * time.Second): 249 fmt.Println(err) 250 } 251 } 252 if newState == currentState.State && *cm.Metadata.Name == currentState.Name { 253 continue 254 } 255 update := K8sStateUpdate{ 256 Name: *cm.Metadata.Name, 257 State: newState, 258 } 259 // Try sending the new state to listeners within the server invoking this function 260 select { 261 case updateC <- update: 262 currentState = update 263 case <-time.After(2 * time.Second): 264 // If the message could not be sent try to wakeup the error logger 265 msg := kv.NewError("could not update state").With("namespace", namespace).With("config", *cm.Metadata.Name).With("state", state).With("stack", stack.Trace().TrimRuntime()) 266 select { 267 case errC <- msg: 268 case <-time.After(2 * time.Second): 269 fmt.Println(msg) 270 } 271 continue 272 } 273 } 274 } 275 } 276 } 277 }