github.com/elfadel/cilium@v1.6.12/pkg/datapath/ipcache/listener.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ipcache
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net"
    21  	"os"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/cilium/cilium/pkg/bpf"
    26  	"github.com/cilium/cilium/pkg/controller"
    27  	"github.com/cilium/cilium/pkg/identity"
    28  	"github.com/cilium/cilium/pkg/ipcache"
    29  	"github.com/cilium/cilium/pkg/logging"
    30  	"github.com/cilium/cilium/pkg/logging/logfields"
    31  	ipcacheMap "github.com/cilium/cilium/pkg/maps/ipcache"
    32  	"github.com/cilium/cilium/pkg/node"
    33  	"github.com/cilium/cilium/pkg/option"
    34  	"github.com/cilium/cilium/pkg/source"
    35  
    36  	"github.com/sirupsen/logrus"
    37  )
    38  
    39  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "datapath-ipcache")
    40  
    41  // datapath is an interface to the datapath implementation, used to apply
    42  // changes that are made within this module.
    43  type datapath interface {
    44  	TriggerReloadWithoutCompile(reason string) (*sync.WaitGroup, error)
    45  }
    46  
    47  // BPFListener implements the ipcache.IPIdentityMappingBPFListener
    48  // interface with an IPCache store that is backed by BPF maps.
    49  //
    50  // One listener is shared between callers of OnIPIdentityCacheChange() and the
    51  // controller launched from OnIPIdentityCacheGC(). However, The listener is not
    52  // updated after initialization so no locking is provided for access.
    53  type BPFListener struct {
    54  	// bpfMap is the BPF map that this listener will update when events are
    55  	// received from the IPCache.
    56  	bpfMap *ipcacheMap.Map
    57  
    58  	// datapath allows this listener to trigger BPF program regeneration.
    59  	datapath datapath
    60  }
    61  
    62  func newListener(m *ipcacheMap.Map, d datapath) *BPFListener {
    63  	return &BPFListener{
    64  		bpfMap:   m,
    65  		datapath: d,
    66  	}
    67  }
    68  
    69  // NewListener returns a new listener to push IPCache entries into BPF maps.
    70  func NewListener(d datapath) *BPFListener {
    71  	return newListener(ipcacheMap.IPCache, d)
    72  }
    73  
    74  // OnIPIdentityCacheChange is called whenever there is a change of state in the
    75  // IPCache (pkg/ipcache).
    76  // TODO (FIXME): GH-3161.
    77  //
    78  // 'oldIPIDPair' is ignored here, because in the BPF maps an update for the
    79  // IP->ID mapping will replace any existing contents; knowledge of the old pair
    80  // is not required to upsert the new pair.
    81  func (l *BPFListener) OnIPIdentityCacheChange(modType ipcache.CacheModification, cidr net.IPNet,
    82  	oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) {
    83  
    84  	scopedLog := log
    85  	if option.Config.Debug {
    86  		scopedLog = log.WithFields(logrus.Fields{
    87  			logfields.IPAddr:       cidr,
    88  			logfields.Identity:     newID,
    89  			logfields.Modification: modType,
    90  		})
    91  	}
    92  
    93  	scopedLog.Debug("Daemon notified of IP-Identity cache state change")
    94  
    95  	// TODO - see if we can factor this into an interface under something like
    96  	// pkg/datapath instead of in the daemon directly so that the code is more
    97  	// logically located.
    98  
    99  	// Update BPF Maps.
   100  
   101  	key := ipcacheMap.NewKey(cidr.IP, cidr.Mask)
   102  
   103  	switch modType {
   104  	case ipcache.Upsert:
   105  		value := ipcacheMap.RemoteEndpointInfo{
   106  			SecurityIdentity: uint32(newID),
   107  			Key:              encryptKey,
   108  		}
   109  
   110  		if newHostIP != nil {
   111  			// If the hostIP is specified and it doesn't point to
   112  			// the local host, then the ipcache should be populated
   113  			// with the hostIP so that this traffic can be guided
   114  			// to a tunnel endpoint destination.
   115  			externalIP := node.GetExternalIPv4()
   116  			if ip4 := newHostIP.To4(); ip4 != nil && !ip4.Equal(externalIP) {
   117  				copy(value.TunnelEndpoint[:], ip4)
   118  			}
   119  		}
   120  		err := l.bpfMap.Update(&key, &value)
   121  		if err != nil {
   122  			scopedLog.WithError(err).WithFields(logrus.Fields{
   123  				"key":                  key.String(),
   124  				"value":                value.String(),
   125  				logfields.IPAddr:       cidr,
   126  				logfields.Identity:     newID,
   127  				logfields.Modification: modType,
   128  			}).Warning("unable to update bpf map")
   129  		}
   130  	case ipcache.Delete:
   131  		err := l.bpfMap.Delete(&key)
   132  		if err != nil {
   133  			scopedLog.WithError(err).WithFields(logrus.Fields{
   134  				"key":                  key.String(),
   135  				logfields.IPAddr:       cidr,
   136  				logfields.Identity:     newID,
   137  				logfields.Modification: modType,
   138  			}).Warning("unable to delete from bpf map")
   139  		}
   140  	default:
   141  		scopedLog.Warning("cache modification type not supported")
   142  	}
   143  }
   144  
   145  // updateStaleEntriesFunction returns a DumpCallback that will update the
   146  // specified "keysToRemove" map with entries that exist in the BPF map which
   147  // do not exist in the in-memory ipcache.
   148  //
   149  // Must be called while holding ipcache.IPIdentityCache.Lock for reading.
   150  func updateStaleEntriesFunction(keysToRemove map[string]*ipcacheMap.Key) bpf.DumpCallback {
   151  	return func(key bpf.MapKey, _ bpf.MapValue) {
   152  		k := key.(*ipcacheMap.Key)
   153  		keyToIP := k.String()
   154  
   155  		// Don't RLock as part of the same goroutine.
   156  		if i, exists := ipcache.IPIdentityCache.LookupByPrefixRLocked(keyToIP); !exists {
   157  			switch i.Source {
   158  			case source.KVStore, source.Local:
   159  				// Cannot delete from map during callback because DumpWithCallback
   160  				// RLocks the map.
   161  				keysToRemove[keyToIP] = k.DeepCopy()
   162  			}
   163  		}
   164  	}
   165  }
   166  
   167  // handleMapShuffleFailure attempts to move the map with name 'backup' back to
   168  // 'realized', and logs a warning message if this can't be achieved.
   169  func handleMapShuffleFailure(src, dst string) {
   170  	backupPath := bpf.MapPath(src)
   171  	realizedPath := bpf.MapPath(dst)
   172  
   173  	if err := os.Rename(backupPath, realizedPath); err != nil {
   174  		log.WithError(err).WithFields(logrus.Fields{
   175  			logfields.BPFMapPath: realizedPath,
   176  		}).Warningf("Unable to recover during error renaming map paths")
   177  	}
   178  }
   179  
   180  // shuffleMaps attempts to move the map with name 'realized' to 'backup' and
   181  // 'pending' to 'realized'. If an error occurs, attempts to return the maps
   182  // back to their original paths.
   183  func shuffleMaps(realized, backup, pending string) error {
   184  	realizedPath := bpf.MapPath(realized)
   185  	backupPath := bpf.MapPath(backup)
   186  	pendingPath := bpf.MapPath(pending)
   187  
   188  	if err := os.Rename(realizedPath, backupPath); err != nil && !os.IsNotExist(err) {
   189  		return fmt.Errorf("Unable to back up existing ipcache: %s", err)
   190  	}
   191  
   192  	if err := os.Rename(pendingPath, realizedPath); err != nil {
   193  		handleMapShuffleFailure(backup, realized)
   194  		return fmt.Errorf("Unable to shift ipcache into new location: %s", err)
   195  	}
   196  
   197  	return nil
   198  }
   199  
   200  // garbageCollect implements GC of the ipcache map in one of two ways:
   201  //
   202  // On Linux 4.9, 4.10 or 4.16 and later:
   203  //   Periodically sweep through every element in the BPF map and check it
   204  //   against the in-memory copy of the map. If it doesn't exist in memory,
   205  //   delete the entry.
   206  // On Linux 4.11 to 4.15:
   207  //   Create a brand new map, populate it with all of the IPCache entries from
   208  //   the in-memory cache, delete the old map, and trigger regeneration of all
   209  //   BPF programs so that they pick up the new map.
   210  //
   211  // Returns an error if garbage collection failed to occur.
   212  func (l *BPFListener) garbageCollect(ctx context.Context) (*sync.WaitGroup, error) {
   213  	log.Debug("Running garbage collection for BPF IPCache")
   214  
   215  	if ipcacheMap.SupportsDelete() {
   216  		// Since controllers run asynchronously, need to make sure
   217  		// IPIdentityCache is not being updated concurrently while we
   218  		// do GC;
   219  		ipcache.IPIdentityCache.RLock()
   220  		defer ipcache.IPIdentityCache.RUnlock()
   221  
   222  		keysToRemove := map[string]*ipcacheMap.Key{}
   223  		if err := l.bpfMap.DumpWithCallback(updateStaleEntriesFunction(keysToRemove)); err != nil {
   224  			return nil, fmt.Errorf("error dumping ipcache BPF map: %s", err)
   225  		}
   226  
   227  		// Remove all keys which are not in in-memory cache from BPF map
   228  		// for consistency.
   229  		for _, k := range keysToRemove {
   230  			log.WithFields(logrus.Fields{logfields.BPFMapKey: k}).
   231  				Debug("deleting from ipcache BPF map")
   232  			if err := l.bpfMap.Delete(k); err != nil {
   233  				return nil, fmt.Errorf("error deleting key %s from ipcache BPF map: %s", k, err)
   234  			}
   235  		}
   236  	} else {
   237  		// Since controllers run asynchronously, need to make sure
   238  		// IPIdentityCache is not being updated concurrently while we
   239  		// do GC;
   240  		ipcache.IPIdentityCache.RLock()
   241  
   242  		// Populate the map at the new path
   243  		pendingMapName := fmt.Sprintf("%s_pending", ipcacheMap.Name)
   244  		pendingMap := ipcacheMap.NewMap(pendingMapName)
   245  		if _, err := pendingMap.OpenOrCreate(); err != nil {
   246  			ipcache.IPIdentityCache.RUnlock()
   247  			return nil, fmt.Errorf("Unable to create %s map: %s", pendingMapName, err)
   248  		}
   249  		pendingListener := newListener(pendingMap, l.datapath)
   250  		ipcache.IPIdentityCache.DumpToListenerLocked(pendingListener)
   251  		err := pendingMap.Close()
   252  		if err != nil {
   253  			log.WithError(err).WithField("map-name", pendingMapName).Warning("unable to close map")
   254  		}
   255  
   256  		// Move the maps around on the filesystem so that BPF reload
   257  		// will pick up the new paths without requiring recompilation.
   258  		backupMapName := fmt.Sprintf("%s_old", ipcacheMap.Name)
   259  		if err := shuffleMaps(ipcacheMap.Name, backupMapName, pendingMapName); err != nil {
   260  			ipcache.IPIdentityCache.RUnlock()
   261  			return nil, err
   262  		}
   263  
   264  		// Reopen the ipcache map so that new writes and reads will use
   265  		// the new map
   266  		if err := ipcacheMap.Reopen(); err != nil {
   267  			handleMapShuffleFailure(backupMapName, ipcacheMap.Name)
   268  			ipcache.IPIdentityCache.RUnlock()
   269  			return nil, err
   270  		}
   271  
   272  		// Unlock the ipcache as in order for
   273  		// TriggerReloadWithoutCompile() to succeed, other endpoint
   274  		// regenerations which are blocking on the ipcache lock may
   275  		// need to succeed first (#11946)
   276  		ipcache.IPIdentityCache.RUnlock()
   277  
   278  		wg, err := l.datapath.TriggerReloadWithoutCompile("datapath ipcache")
   279  		if err != nil {
   280  			// We can't really undo the map rename again as ipcache
   281  			// operations had already been permitted so the backup
   282  			// map is potentially outdated. Fail hard to restart
   283  			// the agent so we reconstruct the ipcache from
   284  			// scratch.
   285  			log.WithError(err).Fatal("Endpoint datapath reload triggered by ipcache GC failed. Inconsistent state.")
   286  		}
   287  
   288  		_ = os.RemoveAll(bpf.MapPath(backupMapName))
   289  		return wg, nil
   290  	}
   291  	return nil, nil
   292  }
   293  
   294  // OnIPIdentityCacheGC spawns a controller which synchronizes the BPF IPCache Map
   295  // with the in-memory IP-Identity cache.
   296  func (l *BPFListener) OnIPIdentityCacheGC() {
   297  	// This controller ensures that the in-memory IP-identity cache is in-sync
   298  	// with the BPF map on disk. These can get out of sync if the cilium-agent
   299  	// is offline for some time, as the maps persist on the BPF filesystem.
   300  	// In the case that there is some loss of event history in the key-value
   301  	// store (e.g., compaction in etcd), we cannot rely upon the key-value store
   302  	// fully to give us the history of all events. As such, periodically check
   303  	// for inconsistencies in the data-path with that in the agent to ensure
   304  	// consistent state.
   305  	controller.NewManager().UpdateController("ipcache-bpf-garbage-collection",
   306  		controller.ControllerParams{
   307  			DoFunc: func(ctx context.Context) error {
   308  				wg, err := l.garbageCollect(ctx)
   309  				if err != nil {
   310  					return err
   311  				}
   312  				if wg != nil {
   313  					wg.Wait()
   314  				}
   315  				return nil
   316  			},
   317  			RunInterval: 5 * time.Minute,
   318  		},
   319  	)
   320  }