istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/kube/krt/collection.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package krt
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  
    21  	"istio.io/istio/pkg/kube/controllers"
    22  	istiolog "istio.io/istio/pkg/log"
    23  	"istio.io/istio/pkg/maps"
    24  	"istio.io/istio/pkg/ptr"
    25  	"istio.io/istio/pkg/slices"
    26  	"istio.io/istio/pkg/util/sets"
    27  )
    28  
    29  // manyCollection builds a mapping from I->O.
    30  // This can be built from transformation functions of I->*O or I->[]O; both are implemented by this same struct.
    31  // Locking used here is somewhat complex. We use two locks, mu and recomputeMu.
    32  //   - mu is responsible for locking the actual data we are storing. List()/Get() calls will lock this.
    33  //   - recomputeMu is responsible for ensuring there is mutually exclusive access to recomputation. Typically, in a controller
    34  //     pattern this would be accomplished by a queue. However, these add operational and performance overhead that is not required here.
    35  //     Instead, we ensure at most one goroutine is recomputing things at a time.
    36  //     This avoids two dependency updates happening concurrently and writing events out of order.
    37  type manyCollection[I, O any] struct {
    38  	// collectionName provides the collectionName for this collection.
    39  	collectionName string
    40  	id             collectionUID
    41  	// parent is the input collection we are building off of.
    42  	parent Collection[I]
    43  
    44  	// log is a logger for the collection, with additional labels already added to identify it.
    45  	log *istiolog.Scope
    46  
    47  	// recomputeMu blocks a recomputation of I->O.
    48  	recomputeMu sync.Mutex
    49  
    50  	// mu protects all items grouped below.
    51  	// This is acquired for reads and writes of data.
    52  	// This can be acquired with recomputeMu held, but only with strict ordering (mu inside recomputeMu)
    53  	mu              sync.Mutex
    54  	collectionState multiIndex[I, O]
    55  	// collectionDependencies specifies the set of collections we depend on from within the transformation functions (via Fetch).
    56  	// These are keyed by the internal uid() function on collections.
    57  	// Note this does not include `parent`, which is the *primary* dependency declared outside of transformation functions.
    58  	collectionDependencies sets.Set[collectionUID]
    59  	// Stores a map of I -> secondary dependencies (added via Fetch)
    60  	objectDependencies map[Key[I]][]*dependency
    61  
    62  	// eventHandlers is a list of event handlers registered for the collection. On any changes, each will be notified.
    63  	eventHandlers *handlers[O]
    64  
    65  	transformation TransformationMulti[I, O]
    66  
    67  	// augmentation allows transforming an object into another for usage throughout the library. See WithObjectAugmentation.
    68  	augmentation func(a any) any
    69  	synced       chan struct{}
    70  	stop         <-chan struct{}
    71  }
    72  
    73  var _ internalCollection[any] = &manyCollection[any, any]{}
    74  
    75  type handlers[O any] struct {
    76  	mu   sync.RWMutex
    77  	h    []func(o []Event[O], initialSync bool)
    78  	init bool
    79  }
    80  
    81  func (o *handlers[O]) MarkInitialized() []func(o []Event[O], initialSync bool) {
    82  	o.mu.Lock()
    83  	defer o.mu.Unlock()
    84  	o.init = true
    85  	return slices.Clone(o.h)
    86  }
    87  
    88  func (o *handlers[O]) Insert(f func(o []Event[O], initialSync bool)) bool {
    89  	o.mu.Lock()
    90  	defer o.mu.Unlock()
    91  	o.h = append(o.h, f)
    92  	return !o.init
    93  }
    94  
    95  func (o *handlers[O]) Get() []func(o []Event[O], initialSync bool) {
    96  	o.mu.RLock()
    97  	defer o.mu.RUnlock()
    98  	return slices.Clone(o.h)
    99  }
   100  
   101  // multiIndex stores input and output objects.
   102  // Each input and output can be looked up by its key.
   103  // Additionally, a mapping of input key -> output keys stores the transformation.
   104  type multiIndex[I, O any] struct {
   105  	outputs  map[Key[O]]O
   106  	inputs   map[Key[I]]I
   107  	mappings map[Key[I]]sets.Set[Key[O]]
   108  }
   109  
   110  func (h *manyCollection[I, O]) Synced() Syncer {
   111  	return channelSyncer{
   112  		name:   h.collectionName,
   113  		synced: h.synced,
   114  	}
   115  }
   116  
   117  // nolint: unused // (not true, its to implement an interface)
   118  func (h *manyCollection[I, O]) dump() {
   119  	h.recomputeMu.Lock()
   120  	defer h.recomputeMu.Unlock()
   121  	h.mu.Lock()
   122  	defer h.mu.Unlock()
   123  	h.log.Errorf(">>> BEGIN DUMP")
   124  	for k, deps := range h.objectDependencies {
   125  		for _, dep := range deps {
   126  			h.log.Errorf("Dependencies for: %v: %v (%v)", k, dep.collectionName, dep.filter)
   127  		}
   128  	}
   129  	for i, os := range h.collectionState.mappings {
   130  		h.log.Errorf("Input %v -> %v", i, os.UnsortedList())
   131  	}
   132  	for os, o := range h.collectionState.outputs {
   133  		h.log.Errorf("Output %v -> %v", os, o)
   134  	}
   135  	h.log.Errorf("<<< END DUMP")
   136  }
   137  
   138  // nolint: unused // (not true, its to implement an interface)
   139  func (h *manyCollection[I, O]) augment(a any) any {
   140  	if h.augmentation != nil {
   141  		return h.augmentation(a)
   142  	}
   143  	return a
   144  }
   145  
   146  // onPrimaryInputEvent takes a list of I's that changed and reruns the handler over them.
   147  // This is called either when I directly changes, or if a secondary dependency changed. In this case, we compute which I's depended
   148  // on the secondary dependency, and call onPrimaryInputEvent with them
   149  func (h *manyCollection[I, O]) onPrimaryInputEvent(items []Event[I], lock bool) {
   150  	if lock {
   151  		h.recomputeMu.Lock()
   152  		defer h.recomputeMu.Unlock()
   153  	}
   154  	// Between the events being enqueued and now, the input may have changed. Update with latest info.
   155  	// Note we now have the recomputeMu so this is safe; any futures calls will do the same so always have up-to-date information.
   156  	for idx, ev := range items {
   157  		iKey := GetKey(ev.Latest())
   158  		iObj := h.parent.GetKey(iKey)
   159  		if iObj == nil {
   160  			ev.Event = controllers.EventDelete
   161  			if ev.Old == nil {
   162  				// This was an add, now its a Delete. Make sure we don't have Old and New nil, which we claim to be illegal
   163  				ev.Old = ev.New
   164  			}
   165  			ev.New = nil
   166  		} else {
   167  			ev.New = iObj
   168  		}
   169  		items[idx] = ev
   170  	}
   171  	h.onPrimaryInputEventLocked(items)
   172  }
   173  
   174  // onPrimaryInputEventLocked takes a list of I's that changed and reruns the handler over them.
   175  // This should be called with recomputeMu acquired.
   176  func (h *manyCollection[I, O]) onPrimaryInputEventLocked(items []Event[I]) {
   177  	var events []Event[O]
   178  	recomputedResults := make([]map[Key[O]]O, len(items))
   179  	for idx, a := range items {
   180  		if a.Event == controllers.EventDelete {
   181  			// handled below, with full lock...
   182  			continue
   183  		}
   184  		i := a.Latest()
   185  		iKey := GetKey(i)
   186  
   187  		ctx := &collectionDependencyTracker[I, O]{h, nil, iKey}
   188  		results := slices.GroupUnique(h.transformation(ctx, i), GetKey[O])
   189  		recomputedResults[idx] = results
   190  		// Update the I -> Dependency mapping
   191  		h.objectDependencies[iKey] = ctx.d
   192  	}
   193  
   194  	// Now acquire the full lock. Note we still have recomputeMu held!
   195  	h.mu.Lock()
   196  	for idx, a := range items {
   197  		i := a.Latest()
   198  		iKey := GetKey(i)
   199  		if a.Event == controllers.EventDelete {
   200  			for oKey := range h.collectionState.mappings[iKey] {
   201  				oldRes, f := h.collectionState.outputs[oKey]
   202  				if !f {
   203  					h.log.WithLabels("oKey", oKey).Errorf("invalid event, deletion of non-existent object")
   204  					continue
   205  				}
   206  				e := Event[O]{
   207  					Event: controllers.EventDelete,
   208  					Old:   &oldRes,
   209  				}
   210  				events = append(events, e)
   211  				delete(h.collectionState.outputs, oKey)
   212  				if h.log.DebugEnabled() {
   213  					h.log.WithLabels("res", oKey).Debugf("handled delete")
   214  				}
   215  			}
   216  			delete(h.collectionState.mappings, iKey)
   217  			delete(h.collectionState.inputs, iKey)
   218  			delete(h.objectDependencies, iKey)
   219  		} else {
   220  			results := recomputedResults[idx]
   221  			newKeys := sets.New(maps.Keys(results)...)
   222  			oldKeys := h.collectionState.mappings[iKey]
   223  			h.collectionState.mappings[iKey] = newKeys
   224  			h.collectionState.inputs[iKey] = i
   225  			allKeys := newKeys.Copy().Merge(oldKeys)
   226  			// We have now built up a set of I -> []O
   227  			// and found the previous I -> []O mapping
   228  			for key := range allKeys {
   229  				// Find new O object
   230  				newRes, newExists := results[key]
   231  				// Find the old O object
   232  				oldRes, oldExists := h.collectionState.outputs[key]
   233  				e := Event[O]{}
   234  				if newExists && oldExists {
   235  					if equal(newRes, oldRes) {
   236  						// NOP change, skip
   237  						continue
   238  					}
   239  					e.Event = controllers.EventUpdate
   240  					e.New = &newRes
   241  					e.Old = &oldRes
   242  					h.collectionState.outputs[key] = newRes
   243  				} else if newExists {
   244  					e.Event = controllers.EventAdd
   245  					e.New = &newRes
   246  					h.collectionState.outputs[key] = newRes
   247  				} else {
   248  					e.Event = controllers.EventDelete
   249  					e.Old = &oldRes
   250  					delete(h.collectionState.outputs, key)
   251  				}
   252  
   253  				if h.log.DebugEnabled() {
   254  					h.log.WithLabels("res", key, "type", e.Event).Debugf("handled")
   255  				}
   256  				events = append(events, e)
   257  			}
   258  		}
   259  	}
   260  	h.mu.Unlock()
   261  
   262  	// Short circuit if we have nothing to do
   263  	if len(events) == 0 {
   264  		return
   265  	}
   266  	handlers := h.eventHandlers.Get()
   267  
   268  	if h.log.DebugEnabled() {
   269  		h.log.WithLabels("events", len(events), "handlers", len(handlers)).Debugf("calling handlers")
   270  	}
   271  	for _, handler := range handlers {
   272  		handler(slices.Clone(events), false)
   273  	}
   274  }
   275  
   276  // WithName allows explicitly naming a controller. This is a best practice to make debugging easier.
   277  // If not set, a default name is picked.
   278  func WithName(name string) CollectionOption {
   279  	return func(c *collectionOptions) {
   280  		c.name = name
   281  	}
   282  }
   283  
   284  // WithObjectAugmentation allows transforming an object into another for usage throughout the library.
   285  // Currently this applies to things like Name, Namespace, Labels, LabelSelector, etc. Equals is not currently supported,
   286  // but likely in the future.
   287  // The intended usage is to add support for these fields to collections of types that do not implement the appropriate interfaces.
   288  // The conversion function can convert to a embedded struct with extra methods added:
   289  //
   290  //	type Wrapper struct { Object }
   291  //	func (w Wrapper) ResourceName() string { return ... }
   292  //	WithObjectAugmentation(func(o any) any { return Wrapper{o.(Object)} })
   293  func WithObjectAugmentation(fn func(o any) any) CollectionOption {
   294  	return func(c *collectionOptions) {
   295  		c.augmentation = fn
   296  	}
   297  }
   298  
   299  // WithStop sets a custom stop channel so a collection can be terminated when the channel is closed
   300  func WithStop(stop <-chan struct{}) CollectionOption {
   301  	return func(c *collectionOptions) {
   302  		c.stop = stop
   303  	}
   304  }
   305  
   306  // NewCollection transforms a Collection[I] to a Collection[O] by applying the provided transformation function.
   307  // This applies for one-to-one relationships between I and O.
   308  // For zero-to-one, use NewSingleton. For one-to-many, use NewManyCollection.
   309  func NewCollection[I, O any](c Collection[I], hf TransformationSingle[I, O], opts ...CollectionOption) Collection[O] {
   310  	// For implementation simplicity, represent TransformationSingle as a TransformationMulti so we can share an implementation.
   311  	hm := func(ctx HandlerContext, i I) []O {
   312  		res := hf(ctx, i)
   313  		if res == nil {
   314  			return nil
   315  		}
   316  		return []O{*res}
   317  	}
   318  	o := buildCollectionOptions(opts...)
   319  	if o.name == "" {
   320  		o.name = fmt.Sprintf("Collection[%v,%v]", ptr.TypeName[I](), ptr.TypeName[O]())
   321  	}
   322  	return newManyCollection[I, O](c, hm, o)
   323  }
   324  
   325  // NewManyCollection transforms a Collection[I] to a Collection[O] by applying the provided transformation function.
   326  // This applies for one-to-many relationships between I and O.
   327  // For zero-to-one, use NewSingleton. For one-to-one, use NewCollection.
   328  func NewManyCollection[I, O any](c Collection[I], hf TransformationMulti[I, O], opts ...CollectionOption) Collection[O] {
   329  	o := buildCollectionOptions(opts...)
   330  	if o.name == "" {
   331  		o.name = fmt.Sprintf("ManyCollection[%v,%v]", ptr.TypeName[I](), ptr.TypeName[O]())
   332  	}
   333  	return newManyCollection[I, O](c, hf, o)
   334  }
   335  
   336  func newManyCollection[I, O any](cc Collection[I], hf TransformationMulti[I, O], opts collectionOptions) Collection[O] {
   337  	c := cc.(internalCollection[I])
   338  	h := &manyCollection[I, O]{
   339  		transformation:         hf,
   340  		collectionName:         opts.name,
   341  		id:                     nextUID(),
   342  		log:                    log.WithLabels("owner", opts.name),
   343  		parent:                 c,
   344  		collectionDependencies: sets.New[collectionUID](),
   345  		objectDependencies:     map[Key[I]][]*dependency{},
   346  		collectionState: multiIndex[I, O]{
   347  			inputs:   map[Key[I]]I{},
   348  			outputs:  map[Key[O]]O{},
   349  			mappings: map[Key[I]]sets.Set[Key[O]]{},
   350  		},
   351  		eventHandlers: &handlers[O]{},
   352  		augmentation:  opts.augmentation,
   353  		synced:        make(chan struct{}),
   354  		stop:          opts.stop,
   355  	}
   356  	go func() {
   357  		// Wait for primary dependency to be ready
   358  		if !c.Synced().WaitUntilSynced(h.stop) {
   359  			return
   360  		}
   361  		// Now, register our handler. This will call Add() for the initial state
   362  		// Locking here is tricky. We want to make sure we don't get duplicate events.
   363  		// When we run RegisterBatch, it will trigger events for the initial state. However, other events could trigger
   364  		// while we are processing these.
   365  		// By holding the lock, we ensure we have exclusive access during this time.
   366  		h.recomputeMu.Lock()
   367  		h.eventHandlers.MarkInitialized()
   368  		handlerReg := c.RegisterBatch(func(events []Event[I], initialSync bool) {
   369  			if log.DebugEnabled() {
   370  				h.log.WithLabels("dep", "primary", "batch", len(events)).
   371  					Debugf("got event")
   372  			}
   373  			// Lock after the initial sync only
   374  			// For initial sync we explicitly hold the lock ourselves to ensure we have a broad enough critical section.
   375  			lock := !initialSync
   376  			h.onPrimaryInputEvent(events, lock)
   377  		}, true)
   378  		if !handlerReg.WaitUntilSynced(h.stop) {
   379  			h.recomputeMu.Unlock()
   380  			return
   381  		}
   382  		h.recomputeMu.Unlock()
   383  		close(h.synced)
   384  		h.log.Infof("%v synced", h.name())
   385  	}()
   386  	return h
   387  }
   388  
   389  // Handler is called when a dependency changes. We will take as inputs the item that changed.
   390  // Then we find all of our own values (I) that changed and onPrimaryInputEvent() them
   391  func (h *manyCollection[I, O]) onSecondaryDependencyEvent(sourceCollection collectionUID, events []Event[any]) {
   392  	h.recomputeMu.Lock()
   393  	defer h.recomputeMu.Unlock()
   394  	// A secondary dependency changed...
   395  	// Got an event. Now we need to find out who depends on it..
   396  	changedInputKeys := sets.Set[Key[I]]{}
   397  	// Check old and new
   398  	for _, ev := range events {
   399  		// We have a possibly dependant object changed. For each input object, see if it depends on the object.
   400  		// This can be by name or the entire type.
   401  		// objectRelations stores each input key to dependency specification.
   402  		for iKey, dependencies := range h.objectDependencies {
   403  			if changed := h.objectChanged(iKey, dependencies, sourceCollection, ev); changed {
   404  				changedInputKeys.Insert(iKey)
   405  			}
   406  		}
   407  	}
   408  	h.log.Debugf("event size %v, impacts %v objects", len(events), len(changedInputKeys))
   409  
   410  	toRun := make([]Event[I], 0, len(changedInputKeys))
   411  	// Now we have the set of input keys that changed. We need to recompute all of these.
   412  	// While we could just do that manually, to re-use code, we will convert these into Event[I] and use the same logic as
   413  	// we would if the input itself changed.
   414  	for i := range changedInputKeys {
   415  		iObj := h.parent.GetKey(i)
   416  		if iObj == nil {
   417  			// Object no longer found means it has been deleted.
   418  			h.log.Debugf("parent deletion %v", i)
   419  			for oKey := range h.collectionState.mappings[i] {
   420  				_, f := h.collectionState.outputs[oKey]
   421  				if !f {
   422  					// Typically happens when O has multiple parents
   423  					log.WithLabels("iKey", i, "oKey", oKey).Errorf("BUG, inconsistent")
   424  					continue
   425  				}
   426  				e := Event[I]{
   427  					Event: controllers.EventDelete,
   428  					Old:   ptr.Of(h.collectionState.inputs[i]),
   429  				}
   430  				toRun = append(toRun, e)
   431  			}
   432  		} else {
   433  			// Typically an EventUpdate should have Old and New. We only have New here.
   434  			// In practice, this is an internal surface only so we just make sure onPrimaryInputEvent handles this.
   435  			toRun = append(toRun, Event[I]{
   436  				Event: controllers.EventUpdate,
   437  				New:   iObj,
   438  			})
   439  		}
   440  	}
   441  	h.onPrimaryInputEventLocked(toRun)
   442  }
   443  
   444  func (h *manyCollection[I, O]) objectChanged(iKey Key[I], dependencies []*dependency, sourceCollection collectionUID, ev Event[any]) bool {
   445  	for _, dep := range dependencies {
   446  		id := dep.id
   447  		if id != sourceCollection {
   448  			continue
   449  		}
   450  		// For each input, we will check if it depends on this event.
   451  		// We use Items() to check both the old and new object; we will recompute if either matched
   452  		for _, item := range ev.Items() {
   453  			match := dep.filter.Matches(item, false)
   454  			if h.log.DebugEnabled() {
   455  				h.log.WithLabels("item", iKey, "match", match).Debugf("dependency change for collection %T", sourceCollection)
   456  			}
   457  			if match {
   458  				// Its a match! Return now. We don't need to check all dependencies, since we just need to find if any of them changed
   459  				return true
   460  			}
   461  		}
   462  	}
   463  	return false
   464  }
   465  
   466  func (h *manyCollection[I, O]) _internalHandler() {
   467  }
   468  
   469  func (h *manyCollection[I, O]) GetKey(k Key[O]) (res *O) {
   470  	h.mu.Lock()
   471  	defer h.mu.Unlock()
   472  	rf, f := h.collectionState.outputs[k]
   473  	if f {
   474  		return &rf
   475  	}
   476  	return nil
   477  }
   478  
   479  func (h *manyCollection[I, O]) List() (res []O) {
   480  	h.mu.Lock()
   481  	defer h.mu.Unlock()
   482  	return maps.Values(h.collectionState.outputs)
   483  }
   484  
   485  func (h *manyCollection[I, O]) Register(f func(o Event[O])) Syncer {
   486  	return registerHandlerAsBatched[O](h, f)
   487  }
   488  
   489  func (h *manyCollection[I, O]) RegisterBatch(f func(o []Event[O], initialSync bool), runExistingState bool) Syncer {
   490  	if runExistingState {
   491  		h.recomputeMu.Lock()
   492  		defer h.recomputeMu.Unlock()
   493  	}
   494  	initialized := !h.eventHandlers.Insert(f)
   495  	if initialized && runExistingState {
   496  		// Already started. Pause everything, and run through the handler.
   497  		h.mu.Lock()
   498  		events := make([]Event[O], 0, len(h.collectionState.outputs))
   499  		for _, o := range h.collectionState.outputs {
   500  			o := o
   501  			events = append(events, Event[O]{
   502  				New:   &o,
   503  				Event: controllers.EventAdd,
   504  			})
   505  		}
   506  		h.mu.Unlock()
   507  		if len(events) > 0 {
   508  			if log.DebugEnabled() {
   509  				h.log.WithLabels("items", len(events)).Debugf("call handler with initial state")
   510  			}
   511  			f(events, true)
   512  		}
   513  		// We handle events in sequence here, so its always synced at this point/
   514  		return alwaysSynced{}
   515  	}
   516  	return channelSyncer{
   517  		name:   h.collectionName + " handler",
   518  		synced: h.synced,
   519  	}
   520  }
   521  
   522  func (h *manyCollection[I, O]) name() string {
   523  	return h.collectionName
   524  }
   525  
   526  // nolint: unused // (not true, its to implement an interface)
   527  func (h *manyCollection[I, O]) uid() collectionUID {
   528  	return h.id
   529  }
   530  
   531  // collectionDependencyTracker tracks, for a single transformation call, all dependencies registered.
   532  // These are inserted on each call to Fetch().
   533  // Once the transformation function is complete, the set of dependencies for the provided input will be replaced
   534  // with the set accumulated here.
   535  //
   536  // Note: this is used instead of passing manyCollection to the transformation function directly because we want to build up some state
   537  // for a given transformation call at once, then apply it in a single transaction to the manyCollection.
   538  type collectionDependencyTracker[I, O any] struct {
   539  	*manyCollection[I, O]
   540  	d   []*dependency
   541  	key Key[I]
   542  }
   543  
   544  func (i *collectionDependencyTracker[I, O]) name() string {
   545  	return fmt.Sprintf("%s{%s}", i.collectionName, i.key)
   546  }
   547  
   548  // registerDependency track a dependency. This is in the context of a specific input I type, as we create a collectionDependencyTracker
   549  // per I.
   550  func (i *collectionDependencyTracker[I, O]) registerDependency(
   551  	d *dependency,
   552  	syncer Syncer,
   553  	register func(f erasedEventHandler),
   554  ) {
   555  	i.d = append(i.d, d)
   556  
   557  	// For any new collections we depend on, start watching them if its the first time we have watched them.
   558  	if !i.collectionDependencies.InsertContains(d.id) {
   559  		i.log.WithLabels("collection", d.collectionName).Debugf("register new dependency")
   560  		syncer.WaitUntilSynced(i.stop)
   561  		register(func(o []Event[any], initialSync bool) {
   562  			i.onSecondaryDependencyEvent(d.id, o)
   563  		})
   564  	}
   565  }
   566  
   567  func (i *collectionDependencyTracker[I, O]) _internalHandler() {
   568  }