github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/k8s.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  // This file contains functions related to Kubernetes (k8s) support for the runner.
     6  // The runner can use k8s to watch and load ConfigMap information that it can use
     7  // to manage its life cycle and in the future to load configuration information.
     8  //
     9  // The choice to make use of the package from Eric Chiang is driven by the
    10  // package dependency issues with using the official go client.  It rivals
    11  // the spagetti dependencies of Dockers buildkit, borderline horrific.  The chosen
    12  // package has a single dependency and trades off using generated protobuf structures
    13  // and so it wired to the k8s versions via that method, a tradeoff I'm willing to
    14  // make based on my attempts with BuildKit.
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/davecgh/go-spew/spew"
    23  	"github.com/karlmutch/k8s"
    24  	core "github.com/karlmutch/k8s/apis/core/v1"
    25  
    26  	"github.com/go-stack/stack"
    27  	"github.com/lthibault/jitterbug"
    28  
    29  	"github.com/jjeffery/kv" // MIT License
    30  
    31  	"github.com/leaf-ai/studio-go-runner/internal/types"
    32  )
    33  
    34  var (
    35  	k8sClient  *k8s.Client
    36  	k8sInitErr kv.Error
    37  
    38  	_ = attemptK8sStart()
    39  
    40  	protect sync.Mutex
    41  )
    42  
    43  func attemptK8sStart() (err kv.Error) {
    44  	protect.Lock()
    45  	defer protect.Unlock()
    46  
    47  	if client, errGo := k8s.NewInClusterClient(); errGo != nil {
    48  		k8sInitErr = kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    49  	} else {
    50  		k8sClient = client
    51  	}
    52  
    53  	return k8sInitErr
    54  }
    55  
    56  func watchCMaps(ctx context.Context, namespace string) (cmChange chan *core.ConfigMap, err kv.Error) {
    57  
    58  	configMap := core.ConfigMap{}
    59  	watcher, errGo := k8sClient.Watch(ctx, namespace, &configMap)
    60  	if errGo != nil {
    61  		return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    62  	}
    63  
    64  	cmChange = make(chan *core.ConfigMap, 1)
    65  	go func() {
    66  
    67  		defer func() {
    68  			if watcher != nil {
    69  				watcher.Close() // Always close the returned watcher.
    70  			}
    71  		}()
    72  
    73  		for {
    74  			cm := &core.ConfigMap{}
    75  			// Next does not support cancellation and is blocking so we have to
    76  			// abandon this thread and simply let it run unmanaged
    77  			_, err := watcher.Next(cm)
    78  			if err != nil {
    79  				select {
    80  				case <-ctx.Done():
    81  					return
    82  				default:
    83  				}
    84  				if watcher != nil {
    85  					watcher.Close()
    86  					watcher = nil
    87  				}
    88  				// watcher encountered and error, create a new watcher
    89  				watcher, _ = k8sClient.Watch(ctx, namespace, &configMap)
    90  				continue
    91  			}
    92  			select {
    93  			case cmChange <- cm:
    94  			case <-time.After(time.Second):
    95  				spew.Dump(*cm)
    96  			}
    97  		}
    98  	}()
    99  	return cmChange, nil
   100  }
   101  
   102  // MonitorK8s is used to send appropriate errors into an error reporting channel
   103  // on a regular basis if the k8s connectivity state changes
   104  //
   105  func MonitorK8s(ctx context.Context, errC chan<- kv.Error) {
   106  
   107  	t := jitterbug.New(time.Second*30, &jitterbug.Norm{Stdev: time.Second * 3})
   108  	defer t.Stop()
   109  
   110  	k8sLastErr := kv.NewError("")
   111  	for {
   112  		select {
   113  		case <-ctx.Done():
   114  			return
   115  
   116  		case <-t.C:
   117  			func() {
   118  				attemptRestart := false
   119  				protect.Lock()
   120  
   121  				if k8sClient == nil {
   122  					if k8sInitErr != nil && k8sInitErr != k8sLastErr {
   123  						// Attempt to send the error back to any listener and timeout after a second.
   124  						// The jitter based timer will provide another opportunity in 30 or thereabouts
   125  						// seconds should this timeout
   126  						select {
   127  						case errC <- k8sInitErr:
   128  							k8sLastErr = k8sInitErr
   129  						case <-time.After(time.Second):
   130  						}
   131  					}
   132  					attemptRestart = true
   133  
   134  				}
   135  				protect.Unlock()
   136  
   137  				if attemptRestart {
   138  					// Doing this re-applies the lock so we do it outside of the earlier block
   139  					attemptK8sStart()
   140  				}
   141  			}()
   142  		}
   143  	}
   144  }
   145  
   146  // IsAliveK8s is used to extract any kv.in the state of the k8s client api connection.
   147  //
   148  // A nil returned indicates k8s is working and in use, otherwise a descriptive error
   149  // is returned.
   150  //
   151  func IsAliveK8s() (err kv.Error) {
   152  	protect.Lock()
   153  	defer protect.Unlock()
   154  
   155  	if k8sClient == nil {
   156  		k8sInitErr = kv.NewError("Kubernetes uninitialized or no cluster present").With("stack", stack.Trace().TrimRuntime())
   157  	}
   158  	return k8sInitErr
   159  }
   160  
   161  // ConfigK8s is used to pull the values from a named config map in k8s
   162  //
   163  // This function will return an empty map and and error value on failure.
   164  //
   165  func ConfigK8s(ctx context.Context, namespace string, name string) (values map[string]string, err kv.Error) {
   166  	values = map[string]string{}
   167  
   168  	if err = IsAliveK8s(); err != nil {
   169  		return values, nil
   170  	}
   171  	cfg := &core.ConfigMap{}
   172  
   173  	if errGo := k8sClient.Get(ctx, namespace, name, cfg); errGo != nil {
   174  		return values, kv.Wrap(errGo).With("namespace", namespace).With("name", name).With("stack", stack.Trace().TrimRuntime())
   175  	}
   176  
   177  	if name == *cfg.Metadata.Name {
   178  		fmt.Println(spew.Sdump(cfg.Data), stack.Trace().TrimRuntime())
   179  		return cfg.Data, nil
   180  	}
   181  	return values, kv.NewError("configMap not found").With("namespace", namespace).With("name", name).With("stack", stack.Trace().TrimRuntime())
   182  }
   183  
   184  // K8sStateUpdate encapsulates the known kubernetes state within which the runner finds itself.
   185  //
   186  type K8sStateUpdate struct {
   187  	Name  string
   188  	State types.K8sState
   189  }
   190  
   191  // ListenK8s will register a listener to watch for pod specific configMaps in k8s
   192  // and will relay state changes to a channel,  the global state map should exist
   193  // at the bare minimum.  A state change in either map superseded any previous
   194  // state.
   195  //
   196  // This is a blocking function that will return either upon an error in API calls
   197  // to the cluster API or when the ctx is Done().
   198  //
   199  func ListenK8s(ctx context.Context, namespace string, globalMap string, podMap string, updateC chan<- K8sStateUpdate, errC chan<- kv.Error) (err kv.Error) {
   200  
   201  	// If k8s is not being used ignore this feature
   202  	if err = IsAliveK8s(); err != nil {
   203  		return err
   204  	}
   205  
   206  	// Starts the application level state watching
   207  	currentState := K8sStateUpdate{
   208  		State: types.K8sUnknown,
   209  	}
   210  
   211  	// Start the k8s configMap watcher
   212  	cmChanges, err := watchCMaps(ctx, namespace)
   213  	if err != nil {
   214  		// The implication of an error here is that we will never get updates from k8s
   215  		return err
   216  	}
   217  
   218  	fmt.Println("k8s watcher starting in namespace", namespace)
   219  	defer fmt.Println("k8s watcher stopping")
   220  
   221  	// Once every 3 minutes for so we will force the state propagation
   222  	// to ensure that modules started after this module has started see something
   223  	refresh := jitterbug.New(time.Minute*3, &jitterbug.Norm{Stdev: time.Second * 15})
   224  	defer refresh.Stop()
   225  
   226  	for {
   227  		select {
   228  		case <-ctx.Done():
   229  			return nil
   230  		case <-refresh.C:
   231  			// Try resending an existing state to listeners to refresh things
   232  			select {
   233  			case updateC <- currentState:
   234  			case <-time.After(2 * time.Second):
   235  			}
   236  		case cm := <-cmChanges:
   237  			if cm == nil {
   238  				fmt.Println("k8s watcher channel closed", namespace)
   239  				return
   240  			}
   241  			if *cm.Metadata.Namespace == namespace && (*cm.Metadata.Name == globalMap || *cm.Metadata.Name == podMap) {
   242  				if state, _ := cm.Data["STATE"]; len(state) != 0 {
   243  					newState, errGo := types.K8sStateString(state)
   244  					if errGo != nil {
   245  						msg := kv.Wrap(errGo).With("namespace", namespace).With("config", *cm.Metadata.Name).With("state", state).With("stack", stack.Trace().TrimRuntime())
   246  						select {
   247  						case errC <- msg:
   248  						case <-time.After(2 * time.Second):
   249  							fmt.Println(err)
   250  						}
   251  					}
   252  					if newState == currentState.State && *cm.Metadata.Name == currentState.Name {
   253  						continue
   254  					}
   255  					update := K8sStateUpdate{
   256  						Name:  *cm.Metadata.Name,
   257  						State: newState,
   258  					}
   259  					// Try sending the new state to listeners within the server invoking this function
   260  					select {
   261  					case updateC <- update:
   262  						currentState = update
   263  					case <-time.After(2 * time.Second):
   264  						// If the message could not be sent try to wakeup the error logger
   265  						msg := kv.NewError("could not update state").With("namespace", namespace).With("config", *cm.Metadata.Name).With("state", state).With("stack", stack.Trace().TrimRuntime())
   266  						select {
   267  						case errC <- msg:
   268  						case <-time.After(2 * time.Second):
   269  							fmt.Println(msg)
   270  						}
   271  						continue
   272  					}
   273  				}
   274  			}
   275  		}
   276  	}
   277  }