go.ligato.io/vpp-agent/v3@v3.5.0/plugins/kvscheduler/plugin_scheduler.go (about) 1 // Copyright (c) 2018 Cisco and/or its affiliates. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at: 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kvscheduler 16 17 import ( 18 "context" 19 "os" 20 "runtime/trace" 21 "sync" 22 "time" 23 24 "github.com/go-errors/errors" 25 "google.golang.org/protobuf/proto" 26 27 "go.ligato.io/cn-infra/v2/idxmap" 28 "go.ligato.io/cn-infra/v2/idxmap/mem" 29 "go.ligato.io/cn-infra/v2/infra" 30 "go.ligato.io/cn-infra/v2/rpc/rest" 31 32 kvs "go.ligato.io/vpp-agent/v3/plugins/kvscheduler/api" 33 "go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/graph" 34 "go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/registry" 35 "go.ligato.io/vpp-agent/v3/plugins/kvscheduler/internal/utils" 36 "go.ligato.io/vpp-agent/v3/proto/ligato/kvscheduler" 37 ) 38 39 const ( 40 // DependencyRelation identifies dependency relation for the graph. 41 DependencyRelation = "depends-on" 42 43 // DerivesRelation identifies relation of value derivation for the graph. 44 DerivesRelation = "derives" 45 46 // how often the transaction history gets trimmed to remove records too old to keep 47 txnHistoryTrimmingPeriod = 1 * time.Minute 48 49 // by default, a history of processed transaction is recorded 50 defaultRecordTransactionHistory = true 51 52 // by default, only transaction processed in the last 24 hours are kept recorded 53 // (with the exception of permanently recorded init period) 54 defaultTransactionHistoryAgeLimit = 24 * 60 // in minutes 55 56 // by default, transactions from the first hour of runtime stay permanently 57 // recorded 58 defaultPermanentlyRecordedInitPeriod = 60 // in minutes 59 60 // by default, all NB transactions and SB notifications are run without 61 // simulation (Retries are always first simulated) 62 defaultEnableTxnSimulation = false 63 64 // by default, a concise summary of every processed transactions is printed 65 // to stdout 66 defaultPrintTxnSummary = true 67 68 // name of the environment variable used to enable verification after every transaction 69 verifyModeEnv = "KVSCHED_VERIFY_MODE" 70 71 // name of the environment variable used to turn on automatic check for 72 // the preservation of the original network namespace after descriptor operations 73 checkNetNamespaceEnv = "KVSCHED_CHECK_NET_NS" 74 75 // name of the environment variable used to trigger log messages showing 76 // graph traversal 77 logGraphWalkEnv = "KVSCHED_LOG_GRAPH_WALK" 78 ) 79 80 // Scheduler is a CN-infra plugin implementing KVScheduler. 81 // Detailed documentation can be found in the "api" and "docs" sub-folders. 82 type Scheduler struct { 83 Deps 84 85 // configuration 86 config *Config 87 88 // management of go routines 89 ctx context.Context 90 cancel context.CancelFunc 91 wg sync.WaitGroup 92 93 // in-memory representation of all created+pending kv-pairs and their dependencies 94 graph graph.Graph 95 96 // registry for descriptors 97 registry registry.Registry 98 99 // a list of key prefixed covered by registered descriptors 100 keyPrefixes []string 101 102 // TXN processing 103 txnLock sync.Mutex // can be used to pause transaction processing; always lock before the graph! 104 txnQueue chan *transaction 105 txnSeqNumber uint64 106 resyncCount uint 107 108 // value status 109 updatedStates utils.KeySet // base values with updated status 110 valStateWatchers []valStateWatcher 111 112 // TXN history 113 historyLock sync.Mutex 114 txnHistory []*kvs.RecordedTxn // ordered from the oldest to the latest 115 startTime time.Time 116 117 // debugging 118 verifyMode bool 119 logGraphWalk bool 120 } 121 122 // Deps lists dependencies of the scheduler. 123 type Deps struct { 124 infra.PluginDeps 125 HTTPHandlers rest.HTTPHandlers 126 } 127 128 // Config holds the KVScheduler configuration. 129 type Config struct { 130 RecordTransactionHistory bool `json:"record-transaction-history"` 131 TransactionHistoryAgeLimit uint32 `json:"transaction-history-age-limit"` // in minutes 132 PermanentlyRecordedInitPeriod uint32 `json:"permanently-recorded-init-period"` // in minutes 133 EnableTxnSimulation bool `json:"enable-txn-simulation"` 134 PrintTxnSummary bool `json:"print-txn-summary"` 135 } 136 137 // SchedulerTxn implements transaction for the KV scheduler. 138 type SchedulerTxn struct { 139 scheduler *Scheduler 140 values map[string]proto.Message 141 } 142 143 // valStateWatcher represents one subscription for value state updates. 144 type valStateWatcher struct { 145 channel chan<- *kvscheduler.BaseValueStatus 146 selector kvs.KeySelector 147 } 148 149 // Init initializes the scheduler. Single go routine is started that will process 150 // all the transactions synchronously. 151 func (s *Scheduler) Init() error { 152 // default configuration 153 s.config = &Config{ 154 RecordTransactionHistory: defaultRecordTransactionHistory, 155 TransactionHistoryAgeLimit: defaultTransactionHistoryAgeLimit, 156 PermanentlyRecordedInitPeriod: defaultPermanentlyRecordedInitPeriod, 157 EnableTxnSimulation: defaultEnableTxnSimulation, 158 PrintTxnSummary: defaultPrintTxnSummary, 159 } 160 161 // load configuration 162 err := s.loadConfig(s.config) 163 if err != nil { 164 s.Log.Error(err) 165 return err 166 } 167 s.Log.Debugf("KVScheduler configuration: %+v", *s.config) 168 169 // prepare context for all go routines 170 s.ctx, s.cancel = context.WithCancel(context.Background()) 171 // initialize graph for in-memory storage of key-value pairs 172 graphOpts := graph.Opts{ 173 RecordOldRevs: s.config.RecordTransactionHistory, 174 RecordAgeLimit: s.config.TransactionHistoryAgeLimit, 175 PermanentInitPeriod: s.config.PermanentlyRecordedInitPeriod, 176 MethodTracker: trackGraphMethod, 177 } 178 s.graph = graph.NewGraph(graphOpts) 179 // initialize registry for key->descriptor lookups 180 s.registry = registry.NewRegistry() 181 // prepare channel for serializing transactions 182 s.txnQueue = make(chan *transaction, 100) 183 reportQueueCap(cap(s.txnQueue)) 184 // register REST API handlers 185 s.registerHandlers(s.HTTPHandlers) 186 // initialize key-set used to mark values with updated status 187 s.updatedStates = utils.NewSliceBasedKeySet() 188 // record startup time 189 s.startTime = time.Now() 190 191 // enable or disable debugging mode 192 s.verifyMode = os.Getenv(verifyModeEnv) != "" 193 s.logGraphWalk = os.Getenv(logGraphWalkEnv) != "" 194 195 // go routine processing serialized transactions 196 s.wg.Add(1) 197 go s.consumeTransactions() 198 199 // go routine periodically removing transaction records too old to keep 200 if s.config.RecordTransactionHistory { 201 s.wg.Add(1) 202 go s.transactionHistoryTrimming() 203 } 204 return nil 205 } 206 207 // loadConfig loads configuration file. 208 func (s *Scheduler) loadConfig(config *Config) error { 209 found, err := s.Cfg.LoadValue(config) 210 if err != nil { 211 return err 212 } else if !found { 213 s.Log.Debugf("%v config not found", s.PluginName) 214 return nil 215 } 216 s.Log.Debugf("%v config found: %+v", s.PluginName, config) 217 return err 218 } 219 220 // Close stops all the go routines. 221 func (s *Scheduler) Close() error { 222 s.cancel() 223 s.wg.Wait() 224 return nil 225 } 226 227 // RegisterKVDescriptor registers descriptor(s) for a set of selected 228 // keys. It should be called in the Init phase of agent plugins. 229 // Every key-value pair must have at most one descriptor associated with it 230 // (none for derived values expressing properties). 231 func (s *Scheduler) RegisterKVDescriptor(descriptors ...*kvs.KVDescriptor) error { 232 for _, d := range descriptors { 233 err := s.registerKVDescriptor(d) 234 if err != nil { 235 return err 236 } 237 } 238 return nil 239 } 240 241 func (s *Scheduler) registerKVDescriptor(descriptor *kvs.KVDescriptor) error { 242 // TODO: validate descriptor 243 if s.registry.GetDescriptor(descriptor.Name) != nil { 244 return kvs.ErrDescriptorExists 245 } 246 247 stats.addDescriptor(descriptor.Name) 248 249 s.registry.RegisterDescriptor(descriptor) 250 if descriptor.NBKeyPrefix != "" { 251 s.keyPrefixes = append(s.keyPrefixes, descriptor.NBKeyPrefix) 252 } 253 254 if descriptor.WithMetadata { 255 var metadataMap idxmap.NamedMappingRW 256 if descriptor.MetadataMapFactory != nil { 257 metadataMap = descriptor.MetadataMapFactory() 258 } else { 259 metadataMap = mem.NewNamedMapping(s.Log, descriptor.Name, nil) 260 } 261 graphW := s.graph.Write(true, false) 262 graphW.RegisterMetadataMap(descriptor.Name, metadataMap) 263 graphW.Release() 264 } 265 return nil 266 } 267 268 // GetRegisteredNBKeyPrefixes returns a list of key prefixes from NB with values 269 // described by registered descriptors and therefore managed by the scheduler. 270 func (s *Scheduler) GetRegisteredNBKeyPrefixes() []string { 271 return s.keyPrefixes 272 } 273 274 // StartNBTransaction starts a new transaction from NB to SB plane. 275 // The enqueued actions are scheduled for execution by Txn.Commit(). 276 func (s *Scheduler) StartNBTransaction() kvs.Txn { 277 txn := &SchedulerTxn{ 278 scheduler: s, 279 values: make(map[string]proto.Message), 280 } 281 return txn 282 } 283 284 // TransactionBarrier ensures that all notifications received prior to the call 285 // are associated with transactions that have already finalized. 286 func (s *Scheduler) TransactionBarrier() { 287 s.txnLock.Lock() 288 s.txnLock.Unlock() 289 } 290 291 // PushSBNotification notifies about a spontaneous value change(s) in the SB 292 // plane (i.e. not triggered by NB transaction). 293 func (s *Scheduler) PushSBNotification(notif ...kvs.KVWithMetadata) error { 294 txn := &transaction{ 295 txnType: kvs.SBNotification, 296 created: time.Now(), 297 } 298 for _, value := range notif { 299 txn.values = append(txn.values, kvForTxn{ 300 key: value.Key, 301 value: value.Value, 302 metadata: value.Metadata, 303 origin: kvs.FromSB, 304 }) 305 } 306 return s.enqueueTxn(txn) 307 } 308 309 // GetMetadataMap returns (read-only) map associating value label with value 310 // metadata of a given descriptor. 311 // Returns nil if the descriptor does not expose metadata. 312 func (s *Scheduler) GetMetadataMap(descriptor string) idxmap.NamedMapping { 313 graphR := s.graph.Read() 314 defer graphR.Release() 315 316 return graphR.GetMetadataMap(descriptor) 317 } 318 319 // GetValueStatus returns the status of a non-derived value with the given 320 // key. 321 func (s *Scheduler) GetValueStatus(key string) *kvscheduler.BaseValueStatus { 322 graphR := s.graph.Read() 323 defer graphR.Release() 324 return getValueStatus(graphR.GetNode(key), key) 325 } 326 327 // WatchValueStatus allows to watch for changes in the status of non-derived 328 // values with keys selected by the selector (all if keySelector==nil). 329 func (s *Scheduler) WatchValueStatus(channel chan<- *kvscheduler.BaseValueStatus, keySelector kvs.KeySelector) { 330 s.txnLock.Lock() 331 defer s.txnLock.Unlock() 332 s.valStateWatchers = append(s.valStateWatchers, valStateWatcher{ 333 channel: channel, 334 selector: keySelector, 335 }) 336 } 337 338 // DumpValuesByDescriptor dumps values associated with the given 339 // descriptor as viewed from either NB (what was requested to be applied), 340 // SB (what is actually applied) or from the inside (what kvscheduler's 341 // cached view of SB is). 342 func (s *Scheduler) DumpValuesByDescriptor(descriptor string, view kvs.View) (values []kvs.KVWithMetadata, err error) { 343 if view == kvs.SBView { 344 // pause transaction processing 345 s.txnLock.Lock() 346 defer s.txnLock.Unlock() 347 } 348 349 graphR := s.graph.Read() 350 defer graphR.Release() 351 352 if view == kvs.NBView { 353 // return the intended state 354 var kvPairs []kvs.KVWithMetadata 355 nbNodes := graphR.GetNodes(nil, 356 graph.WithFlags(&DescriptorFlag{descriptor}), 357 graph.WithoutFlags(&DerivedFlag{}, &ValueStateFlag{kvscheduler.ValueState_OBTAINED})) 358 359 for _, node := range nbNodes { 360 lastUpdate := getNodeLastUpdate(node) 361 if lastUpdate == nil || lastUpdate.value == nil { 362 // filter found NB values and values requested to be deleted 363 continue 364 } 365 kvPairs = append(kvPairs, kvs.KVWithMetadata{ 366 Key: node.GetKey(), 367 Value: lastUpdate.value, 368 Origin: kvs.FromNB, 369 Metadata: node.GetMetadata(), 370 }) 371 } 372 return kvPairs, nil 373 } 374 375 /* Cached/SB: */ 376 377 // retrieve from the in-memory graph first (for Retrieve it is used for correlation) 378 inMemNodes := nodesToKVPairsWithMetadata( 379 graphR.GetNodes(nil, descrValsSelectors(descriptor, true)...)) 380 381 if view == kvs.CachedView { 382 // return the scheduler's view of SB for the given descriptor 383 return inMemNodes, nil 384 } 385 386 // obtain Retrieve handler from the descriptor 387 kvDescriptor := s.registry.GetDescriptor(descriptor) 388 if kvDescriptor == nil { 389 err = errors.New("descriptor is not registered") 390 return 391 } 392 if kvDescriptor.Retrieve == nil { 393 err = errors.New("descriptor does not support Retrieve operation") 394 return 395 } 396 397 // retrieve the state directly from SB via descriptor 398 values, err = kvDescriptor.Retrieve(inMemNodes) 399 return 400 } 401 402 func (s *Scheduler) getDescriptorForKeyPrefix(keyPrefix string) string { 403 var descriptorName string 404 s.txnLock.Lock() 405 for _, descriptor := range s.registry.GetAllDescriptors() { 406 if descriptor.NBKeyPrefix == keyPrefix { 407 descriptorName = descriptor.Name 408 } 409 } 410 s.txnLock.Unlock() 411 return descriptorName 412 } 413 414 // DumpValuesByKeyPrefix like DumpValuesByDescriptor returns a dump of values, 415 // but the descriptor is selected based on the key prefix. 416 func (s *Scheduler) DumpValuesByKeyPrefix(keyPrefix string, view kvs.View) (values []kvs.KVWithMetadata, err error) { 417 descriptorName := s.getDescriptorForKeyPrefix(keyPrefix) 418 if descriptorName == "" { 419 err = errors.New("no descriptor found matching the key prefix") 420 return 421 } 422 return s.DumpValuesByDescriptor(descriptorName, view) 423 } 424 425 // SetValue changes (non-derived) value. 426 // If <value> is nil, the value will get deleted. 427 func (txn *SchedulerTxn) SetValue(key string, value proto.Message) kvs.Txn { 428 txn.values[key] = value 429 return txn 430 } 431 432 // Commit orders scheduler to execute enqueued operations. 433 // Operations with unmet dependencies will get postponed and possibly 434 // executed later. 435 func (txn *SchedulerTxn) Commit(ctx context.Context) (txnSeqNum uint64, err error) { 436 ctx, task := trace.NewTask(ctx, "scheduler.Commit") 437 defer task.End() 438 439 txnSeqNum = ^uint64(0) 440 441 txnData := &transaction{ 442 ctx: ctx, 443 txnType: kvs.NBTransaction, 444 nb: &nbTxn{}, 445 values: make([]kvForTxn, 0, len(txn.values)), 446 created: time.Now(), 447 } 448 449 // collect values 450 for key, value := range txn.values { 451 txnData.values = append(txnData.values, kvForTxn{ 452 key: key, 453 value: value, 454 origin: kvs.FromNB, 455 }) 456 } 457 458 // parse transaction options 459 txnData.nb.isBlocking = !kvs.IsNonBlockingTxn(ctx) 460 txnData.nb.resyncType, txnData.nb.verboseRefresh = kvs.IsResync(ctx) 461 txnData.nb.retryArgs, txnData.nb.retryEnabled = kvs.IsWithRetry(ctx) 462 txnData.nb.revertOnFailure = kvs.IsWithRevert(ctx) 463 txnData.nb.description, _ = kvs.IsWithDescription(ctx) 464 txnData.nb.withSimulation = txn.scheduler.config.EnableTxnSimulation || kvs.IsWithSimulation(ctx) 465 466 // validate transaction options 467 if txnData.nb.resyncType == kvs.DownstreamResync && len(txnData.values) > 0 { 468 return txnSeqNum, kvs.NewTransactionError(kvs.ErrCombinedDownstreamResyncWithChange, nil) 469 } 470 if txnData.nb.revertOnFailure && txnData.nb.resyncType != kvs.NotResync { 471 return txnSeqNum, kvs.NewTransactionError(kvs.ErrRevertNotSupportedWithResync, nil) 472 } 473 474 // enqueue txn and for blocking Commit wait for the errors 475 if txnData.nb.isBlocking { 476 txnData.nb.resultChan = make(chan txnResult, 1) 477 } 478 479 err = txn.scheduler.enqueueTxn(txnData) 480 if err != nil { 481 return txnSeqNum, kvs.NewTransactionError(err, nil) 482 } 483 if txnData.nb.isBlocking { 484 select { 485 case <-txn.scheduler.ctx.Done(): 486 return txnSeqNum, kvs.NewTransactionError(kvs.ErrClosedScheduler, nil) 487 case <-ctx.Done(): 488 return txnSeqNum, kvs.NewTransactionError(kvs.ErrTxnWaitCanceled, nil) 489 case txnResult := <-txnData.nb.resultChan: 490 close(txnData.nb.resultChan) 491 trace.Logf(ctx, "txnSeqNum", "%d", txnResult.txnSeqNum) 492 return txnResult.txnSeqNum, txnResult.err 493 } 494 } 495 return txnSeqNum, nil 496 }