go.ligato.io/vpp-agent/v3@v3.5.0/plugins/kvscheduler/plugin_scheduler.go (about)

     1  // Copyright (c) 2018 Cisco and/or its affiliates.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at:
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kvscheduler
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"runtime/trace"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/go-errors/errors"
    25  	"google.golang.org/protobuf/proto"
    26  
    27  	"go.ligato.io/cn-infra/v2/idxmap"
    28  	"go.ligato.io/cn-infra/v2/idxmap/mem"
    29  	"go.ligato.io/cn-infra/v2/infra"
    30  	"go.ligato.io/cn-infra/v2/rpc/rest"
    31  
    32  	kvs "go.ligato.io/vpp-agent/v3/plugins/kvscheduler/api"
    33  	"go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/graph"
    34  	"go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/registry"
    35  	"go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/utils"
    36  	"go.ligato.io/vpp-agent/v3/proto/ligato/kvscheduler"
    37  )
    38  
    39  const (
    40  	// DependencyRelation identifies dependency relation for the graph.
    41  	DependencyRelation = "depends-on"
    42  
    43  	// DerivesRelation identifies relation of value derivation for the graph.
    44  	DerivesRelation = "derives"
    45  
    46  	// how often the transaction history gets trimmed to remove records too old to keep
    47  	txnHistoryTrimmingPeriod = 1 * time.Minute
    48  
    49  	// by default, a history of processed transaction is recorded
    50  	defaultRecordTransactionHistory = true
    51  
    52  	// by default, only transaction processed in the last 24 hours are kept recorded
    53  	// (with the exception of permanently recorded init period)
    54  	defaultTransactionHistoryAgeLimit = 24 * 60 // in minutes
    55  
    56  	// by default, transactions from the first hour of runtime stay permanently
    57  	// recorded
    58  	defaultPermanentlyRecordedInitPeriod = 60 // in minutes
    59  
    60  	// by default, all NB transactions and SB notifications are run without
    61  	// simulation (Retries are always first simulated)
    62  	defaultEnableTxnSimulation = false
    63  
    64  	// by default, a concise summary of every processed transactions is printed
    65  	// to stdout
    66  	defaultPrintTxnSummary = true
    67  
    68  	// name of the environment variable used to enable verification after every transaction
    69  	verifyModeEnv = "KVSCHED_VERIFY_MODE"
    70  
    71  	// name of the environment variable used to turn on automatic check for
    72  	// the preservation of the original network namespace after descriptor operations
    73  	checkNetNamespaceEnv = "KVSCHED_CHECK_NET_NS"
    74  
    75  	// name of the environment variable used to trigger log messages showing
    76  	// graph traversal
    77  	logGraphWalkEnv = "KVSCHED_LOG_GRAPH_WALK"
    78  )
    79  
    80  // Scheduler is a CN-infra plugin implementing KVScheduler.
    81  // Detailed documentation can be found in the "api" and "docs" sub-folders.
    82  type Scheduler struct {
    83  	Deps
    84  
    85  	// configuration
    86  	config *Config
    87  
    88  	// management of go routines
    89  	ctx    context.Context
    90  	cancel context.CancelFunc
    91  	wg     sync.WaitGroup
    92  
    93  	// in-memory representation of all created+pending kv-pairs and their dependencies
    94  	graph graph.Graph
    95  
    96  	// registry for descriptors
    97  	registry registry.Registry
    98  
    99  	// a list of key prefixed covered by registered descriptors
   100  	keyPrefixes []string
   101  
   102  	// TXN processing
   103  	txnLock      sync.Mutex // can be used to pause transaction processing; always lock before the graph!
   104  	txnQueue     chan *transaction
   105  	txnSeqNumber uint64
   106  	resyncCount  uint
   107  
   108  	// value status
   109  	updatedStates    utils.KeySet // base values with updated status
   110  	valStateWatchers []valStateWatcher
   111  
   112  	// TXN history
   113  	historyLock sync.Mutex
   114  	txnHistory  []*kvs.RecordedTxn // ordered from the oldest to the latest
   115  	startTime   time.Time
   116  
   117  	// debugging
   118  	verifyMode   bool
   119  	logGraphWalk bool
   120  }
   121  
   122  // Deps lists dependencies of the scheduler.
   123  type Deps struct {
   124  	infra.PluginDeps
   125  	HTTPHandlers rest.HTTPHandlers
   126  }
   127  
   128  // Config holds the KVScheduler configuration.
   129  type Config struct {
   130  	RecordTransactionHistory      bool   `json:"record-transaction-history"`
   131  	TransactionHistoryAgeLimit    uint32 `json:"transaction-history-age-limit"`    // in minutes
   132  	PermanentlyRecordedInitPeriod uint32 `json:"permanently-recorded-init-period"` // in minutes
   133  	EnableTxnSimulation           bool   `json:"enable-txn-simulation"`
   134  	PrintTxnSummary               bool   `json:"print-txn-summary"`
   135  }
   136  
   137  // SchedulerTxn implements transaction for the KV scheduler.
   138  type SchedulerTxn struct {
   139  	scheduler *Scheduler
   140  	values    map[string]proto.Message
   141  }
   142  
   143  // valStateWatcher represents one subscription for value state updates.
   144  type valStateWatcher struct {
   145  	channel  chan<- *kvscheduler.BaseValueStatus
   146  	selector kvs.KeySelector
   147  }
   148  
   149  // Init initializes the scheduler. Single go routine is started that will process
   150  // all the transactions synchronously.
   151  func (s *Scheduler) Init() error {
   152  	// default configuration
   153  	s.config = &Config{
   154  		RecordTransactionHistory:      defaultRecordTransactionHistory,
   155  		TransactionHistoryAgeLimit:    defaultTransactionHistoryAgeLimit,
   156  		PermanentlyRecordedInitPeriod: defaultPermanentlyRecordedInitPeriod,
   157  		EnableTxnSimulation:           defaultEnableTxnSimulation,
   158  		PrintTxnSummary:               defaultPrintTxnSummary,
   159  	}
   160  
   161  	// load configuration
   162  	err := s.loadConfig(s.config)
   163  	if err != nil {
   164  		s.Log.Error(err)
   165  		return err
   166  	}
   167  	s.Log.Debugf("KVScheduler configuration: %+v", *s.config)
   168  
   169  	// prepare context for all go routines
   170  	s.ctx, s.cancel = context.WithCancel(context.Background())
   171  	// initialize graph for in-memory storage of key-value pairs
   172  	graphOpts := graph.Opts{
   173  		RecordOldRevs:       s.config.RecordTransactionHistory,
   174  		RecordAgeLimit:      s.config.TransactionHistoryAgeLimit,
   175  		PermanentInitPeriod: s.config.PermanentlyRecordedInitPeriod,
   176  		MethodTracker:       trackGraphMethod,
   177  	}
   178  	s.graph = graph.NewGraph(graphOpts)
   179  	// initialize registry for key->descriptor lookups
   180  	s.registry = registry.NewRegistry()
   181  	// prepare channel for serializing transactions
   182  	s.txnQueue = make(chan *transaction, 100)
   183  	reportQueueCap(cap(s.txnQueue))
   184  	// register REST API handlers
   185  	s.registerHandlers(s.HTTPHandlers)
   186  	// initialize key-set used to mark values with updated status
   187  	s.updatedStates = utils.NewSliceBasedKeySet()
   188  	// record startup time
   189  	s.startTime = time.Now()
   190  
   191  	// enable or disable debugging mode
   192  	s.verifyMode = os.Getenv(verifyModeEnv) != ""
   193  	s.logGraphWalk = os.Getenv(logGraphWalkEnv) != ""
   194  
   195  	// go routine processing serialized transactions
   196  	s.wg.Add(1)
   197  	go s.consumeTransactions()
   198  
   199  	// go routine periodically removing transaction records too old to keep
   200  	if s.config.RecordTransactionHistory {
   201  		s.wg.Add(1)
   202  		go s.transactionHistoryTrimming()
   203  	}
   204  	return nil
   205  }
   206  
   207  // loadConfig loads configuration file.
   208  func (s *Scheduler) loadConfig(config *Config) error {
   209  	found, err := s.Cfg.LoadValue(config)
   210  	if err != nil {
   211  		return err
   212  	} else if !found {
   213  		s.Log.Debugf("%v config not found", s.PluginName)
   214  		return nil
   215  	}
   216  	s.Log.Debugf("%v config found: %+v", s.PluginName, config)
   217  	return err
   218  }
   219  
   220  // Close stops all the go routines.
   221  func (s *Scheduler) Close() error {
   222  	s.cancel()
   223  	s.wg.Wait()
   224  	return nil
   225  }
   226  
   227  // RegisterKVDescriptor registers descriptor(s) for a set of selected
   228  // keys. It should be called in the Init phase of agent plugins.
   229  // Every key-value pair must have at most one descriptor associated with it
   230  // (none for derived values expressing properties).
   231  func (s *Scheduler) RegisterKVDescriptor(descriptors ...*kvs.KVDescriptor) error {
   232  	for _, d := range descriptors {
   233  		err := s.registerKVDescriptor(d)
   234  		if err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  func (s *Scheduler) registerKVDescriptor(descriptor *kvs.KVDescriptor) error {
   242  	// TODO: validate descriptor
   243  	if s.registry.GetDescriptor(descriptor.Name) != nil {
   244  		return kvs.ErrDescriptorExists
   245  	}
   246  
   247  	stats.addDescriptor(descriptor.Name)
   248  
   249  	s.registry.RegisterDescriptor(descriptor)
   250  	if descriptor.NBKeyPrefix != "" {
   251  		s.keyPrefixes = append(s.keyPrefixes, descriptor.NBKeyPrefix)
   252  	}
   253  
   254  	if descriptor.WithMetadata {
   255  		var metadataMap idxmap.NamedMappingRW
   256  		if descriptor.MetadataMapFactory != nil {
   257  			metadataMap = descriptor.MetadataMapFactory()
   258  		} else {
   259  			metadataMap = mem.NewNamedMapping(s.Log, descriptor.Name, nil)
   260  		}
   261  		graphW := s.graph.Write(true, false)
   262  		graphW.RegisterMetadataMap(descriptor.Name, metadataMap)
   263  		graphW.Release()
   264  	}
   265  	return nil
   266  }
   267  
   268  // GetRegisteredNBKeyPrefixes returns a list of key prefixes from NB with values
   269  // described by registered descriptors and therefore managed by the scheduler.
   270  func (s *Scheduler) GetRegisteredNBKeyPrefixes() []string {
   271  	return s.keyPrefixes
   272  }
   273  
   274  // StartNBTransaction starts a new transaction from NB to SB plane.
   275  // The enqueued actions are scheduled for execution by Txn.Commit().
   276  func (s *Scheduler) StartNBTransaction() kvs.Txn {
   277  	txn := &SchedulerTxn{
   278  		scheduler: s,
   279  		values:    make(map[string]proto.Message),
   280  	}
   281  	return txn
   282  }
   283  
   284  // TransactionBarrier ensures that all notifications received prior to the call
   285  // are associated with transactions that have already finalized.
   286  func (s *Scheduler) TransactionBarrier() {
   287  	s.txnLock.Lock()
   288  	s.txnLock.Unlock()
   289  }
   290  
   291  // PushSBNotification notifies about a spontaneous value change(s) in the SB
   292  // plane (i.e. not triggered by NB transaction).
   293  func (s *Scheduler) PushSBNotification(notif ...kvs.KVWithMetadata) error {
   294  	txn := &transaction{
   295  		txnType: kvs.SBNotification,
   296  		created: time.Now(),
   297  	}
   298  	for _, value := range notif {
   299  		txn.values = append(txn.values, kvForTxn{
   300  			key:      value.Key,
   301  			value:    value.Value,
   302  			metadata: value.Metadata,
   303  			origin:   kvs.FromSB,
   304  		})
   305  	}
   306  	return s.enqueueTxn(txn)
   307  }
   308  
   309  // GetMetadataMap returns (read-only) map associating value label with value
   310  // metadata of a given descriptor.
   311  // Returns nil if the descriptor does not expose metadata.
   312  func (s *Scheduler) GetMetadataMap(descriptor string) idxmap.NamedMapping {
   313  	graphR := s.graph.Read()
   314  	defer graphR.Release()
   315  
   316  	return graphR.GetMetadataMap(descriptor)
   317  }
   318  
   319  // GetValueStatus returns the status of a non-derived value with the given
   320  // key.
   321  func (s *Scheduler) GetValueStatus(key string) *kvscheduler.BaseValueStatus {
   322  	graphR := s.graph.Read()
   323  	defer graphR.Release()
   324  	return getValueStatus(graphR.GetNode(key), key)
   325  }
   326  
   327  // WatchValueStatus allows to watch for changes in the status of non-derived
   328  // values with keys selected by the selector (all if keySelector==nil).
   329  func (s *Scheduler) WatchValueStatus(channel chan<- *kvscheduler.BaseValueStatus, keySelector kvs.KeySelector) {
   330  	s.txnLock.Lock()
   331  	defer s.txnLock.Unlock()
   332  	s.valStateWatchers = append(s.valStateWatchers, valStateWatcher{
   333  		channel:  channel,
   334  		selector: keySelector,
   335  	})
   336  }
   337  
   338  // DumpValuesByDescriptor dumps values associated with the given
   339  // descriptor as viewed from either NB (what was requested to be applied),
   340  // SB (what is actually applied) or from the inside (what kvscheduler's
   341  // cached view of SB is).
   342  func (s *Scheduler) DumpValuesByDescriptor(descriptor string, view kvs.View) (values []kvs.KVWithMetadata, err error) {
   343  	if view == kvs.SBView {
   344  		// pause transaction processing
   345  		s.txnLock.Lock()
   346  		defer s.txnLock.Unlock()
   347  	}
   348  
   349  	graphR := s.graph.Read()
   350  	defer graphR.Release()
   351  
   352  	if view == kvs.NBView {
   353  		// return the intended state
   354  		var kvPairs []kvs.KVWithMetadata
   355  		nbNodes := graphR.GetNodes(nil,
   356  			graph.WithFlags(&DescriptorFlag{descriptor}),
   357  			graph.WithoutFlags(&DerivedFlag{}, &ValueStateFlag{kvscheduler.ValueState_OBTAINED}))
   358  
   359  		for _, node := range nbNodes {
   360  			lastUpdate := getNodeLastUpdate(node)
   361  			if lastUpdate == nil || lastUpdate.value == nil {
   362  				// filter found NB values and values requested to be deleted
   363  				continue
   364  			}
   365  			kvPairs = append(kvPairs, kvs.KVWithMetadata{
   366  				Key:      node.GetKey(),
   367  				Value:    lastUpdate.value,
   368  				Origin:   kvs.FromNB,
   369  				Metadata: node.GetMetadata(),
   370  			})
   371  		}
   372  		return kvPairs, nil
   373  	}
   374  
   375  	/* Cached/SB: */
   376  
   377  	// retrieve from the in-memory graph first (for Retrieve it is used for correlation)
   378  	inMemNodes := nodesToKVPairsWithMetadata(
   379  		graphR.GetNodes(nil, descrValsSelectors(descriptor, true)...))
   380  
   381  	if view == kvs.CachedView {
   382  		// return the scheduler's view of SB for the given descriptor
   383  		return inMemNodes, nil
   384  	}
   385  
   386  	// obtain Retrieve handler from the descriptor
   387  	kvDescriptor := s.registry.GetDescriptor(descriptor)
   388  	if kvDescriptor == nil {
   389  		err = errors.New("descriptor is not registered")
   390  		return
   391  	}
   392  	if kvDescriptor.Retrieve == nil {
   393  		err = errors.New("descriptor does not support Retrieve operation")
   394  		return
   395  	}
   396  
   397  	// retrieve the state directly from SB via descriptor
   398  	values, err = kvDescriptor.Retrieve(inMemNodes)
   399  	return
   400  }
   401  
   402  func (s *Scheduler) getDescriptorForKeyPrefix(keyPrefix string) string {
   403  	var descriptorName string
   404  	s.txnLock.Lock()
   405  	for _, descriptor := range s.registry.GetAllDescriptors() {
   406  		if descriptor.NBKeyPrefix == keyPrefix {
   407  			descriptorName = descriptor.Name
   408  		}
   409  	}
   410  	s.txnLock.Unlock()
   411  	return descriptorName
   412  }
   413  
   414  // DumpValuesByKeyPrefix like DumpValuesByDescriptor returns a dump of values,
   415  // but the descriptor is selected based on the key prefix.
   416  func (s *Scheduler) DumpValuesByKeyPrefix(keyPrefix string, view kvs.View) (values []kvs.KVWithMetadata, err error) {
   417  	descriptorName := s.getDescriptorForKeyPrefix(keyPrefix)
   418  	if descriptorName == "" {
   419  		err = errors.New("no descriptor found matching the key prefix")
   420  		return
   421  	}
   422  	return s.DumpValuesByDescriptor(descriptorName, view)
   423  }
   424  
   425  // SetValue changes (non-derived) value.
   426  // If <value> is nil, the value will get deleted.
   427  func (txn *SchedulerTxn) SetValue(key string, value proto.Message) kvs.Txn {
   428  	txn.values[key] = value
   429  	return txn
   430  }
   431  
   432  // Commit orders scheduler to execute enqueued operations.
   433  // Operations with unmet dependencies will get postponed and possibly
   434  // executed later.
   435  func (txn *SchedulerTxn) Commit(ctx context.Context) (txnSeqNum uint64, err error) {
   436  	ctx, task := trace.NewTask(ctx, "scheduler.Commit")
   437  	defer task.End()
   438  
   439  	txnSeqNum = ^uint64(0)
   440  
   441  	txnData := &transaction{
   442  		ctx:     ctx,
   443  		txnType: kvs.NBTransaction,
   444  		nb:      &nbTxn{},
   445  		values:  make([]kvForTxn, 0, len(txn.values)),
   446  		created: time.Now(),
   447  	}
   448  
   449  	// collect values
   450  	for key, value := range txn.values {
   451  		txnData.values = append(txnData.values, kvForTxn{
   452  			key:    key,
   453  			value:  value,
   454  			origin: kvs.FromNB,
   455  		})
   456  	}
   457  
   458  	// parse transaction options
   459  	txnData.nb.isBlocking = !kvs.IsNonBlockingTxn(ctx)
   460  	txnData.nb.resyncType, txnData.nb.verboseRefresh = kvs.IsResync(ctx)
   461  	txnData.nb.retryArgs, txnData.nb.retryEnabled = kvs.IsWithRetry(ctx)
   462  	txnData.nb.revertOnFailure = kvs.IsWithRevert(ctx)
   463  	txnData.nb.description, _ = kvs.IsWithDescription(ctx)
   464  	txnData.nb.withSimulation = txn.scheduler.config.EnableTxnSimulation || kvs.IsWithSimulation(ctx)
   465  
   466  	// validate transaction options
   467  	if txnData.nb.resyncType == kvs.DownstreamResync && len(txnData.values) > 0 {
   468  		return txnSeqNum, kvs.NewTransactionError(kvs.ErrCombinedDownstreamResyncWithChange, nil)
   469  	}
   470  	if txnData.nb.revertOnFailure && txnData.nb.resyncType != kvs.NotResync {
   471  		return txnSeqNum, kvs.NewTransactionError(kvs.ErrRevertNotSupportedWithResync, nil)
   472  	}
   473  
   474  	// enqueue txn and for blocking Commit wait for the errors
   475  	if txnData.nb.isBlocking {
   476  		txnData.nb.resultChan = make(chan txnResult, 1)
   477  	}
   478  
   479  	err = txn.scheduler.enqueueTxn(txnData)
   480  	if err != nil {
   481  		return txnSeqNum, kvs.NewTransactionError(err, nil)
   482  	}
   483  	if txnData.nb.isBlocking {
   484  		select {
   485  		case <-txn.scheduler.ctx.Done():
   486  			return txnSeqNum, kvs.NewTransactionError(kvs.ErrClosedScheduler, nil)
   487  		case <-ctx.Done():
   488  			return txnSeqNum, kvs.NewTransactionError(kvs.ErrTxnWaitCanceled, nil)
   489  		case txnResult := <-txnData.nb.resultChan:
   490  			close(txnData.nb.resultChan)
   491  			trace.Logf(ctx, "txnSeqNum", "%d", txnResult.txnSeqNum)
   492  			return txnResult.txnSeqNum, txnResult.err
   493  		}
   494  	}
   495  	return txnSeqNum, nil
   496  }