github.com/imran-kn/cilium-fork@v1.6.9/pkg/bpf/bpf_linux.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  package bpf
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"os"
    23  	"path/filepath"
    24  	"runtime"
    25  	"syscall"
    26  	"unsafe"
    27  
    28  	"github.com/cilium/cilium/pkg/logging/logfields"
    29  	"github.com/cilium/cilium/pkg/metrics"
    30  	"github.com/cilium/cilium/pkg/option"
    31  	"github.com/cilium/cilium/pkg/spanstat"
    32  
    33  	"github.com/sirupsen/logrus"
    34  	"golang.org/x/sys/unix"
    35  )
    36  
    37  // CreateMap creates a Map of type mapType, with key size keySize, a value size of
    38  // valueSize and the maximum amount of entries of maxEntries.
    39  // mapType should be one of the bpf_map_type in "uapi/linux/bpf.h"
    40  // When mapType is the type HASH_OF_MAPS an innerID is required to point at a
    41  // map fd which has the same type/keySize/valueSize/maxEntries as expected map
    42  // entries. For all other mapTypes innerID is ignored and should be zeroed.
    43  func CreateMap(mapType int, keySize, valueSize, maxEntries, flags, innerID uint32, path string) (int, error) {
    44  	// This struct must be in sync with union bpf_attr's anonymous struct
    45  	// used by the BPF_MAP_CREATE command
    46  	uba := struct {
    47  		mapType    uint32
    48  		keySize    uint32
    49  		valueSize  uint32
    50  		maxEntries uint32
    51  		mapFlags   uint32
    52  		innerID    uint32
    53  	}{
    54  		uint32(mapType),
    55  		keySize,
    56  		valueSize,
    57  		maxEntries,
    58  		flags,
    59  		innerID,
    60  	}
    61  
    62  	var duration *spanstat.SpanStat
    63  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
    64  		duration = spanstat.Start()
    65  	}
    66  	ret, _, err := unix.Syscall(
    67  		unix.SYS_BPF,
    68  		BPF_MAP_CREATE,
    69  		uintptr(unsafe.Pointer(&uba)),
    70  		unsafe.Sizeof(uba),
    71  	)
    72  	runtime.KeepAlive(&uba)
    73  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
    74  		metrics.BPFSyscallDuration.WithLabelValues(metricOpCreate, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
    75  	}
    76  
    77  	if err != 0 {
    78  		return 0, &os.PathError{
    79  			Op:   "Unable to create map",
    80  			Path: path,
    81  			Err:  err,
    82  		}
    83  	}
    84  
    85  	return int(ret), nil
    86  }
    87  
    88  // This struct must be in sync with union bpf_attr's anonymous struct used by
    89  // BPF_MAP_*_ELEM commands
    90  type bpfAttrMapOpElem struct {
    91  	mapFd uint32
    92  	pad0  [4]byte
    93  	key   uint64
    94  	value uint64 // union: value or next_key
    95  	flags uint64
    96  }
    97  
    98  // UpdateElementFromPointers updates the map in fd with the given value in the given key.
    99  // The flags can have the following values:
   100  // bpf.BPF_ANY to create new element or update existing;
   101  // bpf.BPF_NOEXIST to create new element if it didn't exist;
   102  // bpf.BPF_EXIST to update existing element.
   103  func UpdateElementFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error {
   104  	var duration *spanstat.SpanStat
   105  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   106  		duration = spanstat.Start()
   107  	}
   108  	ret, _, err := unix.Syscall(
   109  		unix.SYS_BPF,
   110  		BPF_MAP_UPDATE_ELEM,
   111  		uintptr(structPtr),
   112  		sizeOfStruct,
   113  	)
   114  	runtime.KeepAlive(structPtr)
   115  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   116  		metrics.BPFSyscallDuration.WithLabelValues(metricOpUpdate, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   117  	}
   118  
   119  	if ret != 0 || err != 0 {
   120  		return fmt.Errorf("Unable to update element for map with file descriptor %d: %s", fd, err)
   121  	}
   122  
   123  	return nil
   124  }
   125  
   126  // UpdateElement updates the map in fd with the given value in the given key.
   127  // The flags can have the following values:
   128  // bpf.BPF_ANY to create new element or update existing;
   129  // bpf.BPF_NOEXIST to create new element if it didn't exist;
   130  // bpf.BPF_EXIST to update existing element.
   131  // Deprecated, use UpdateElementFromPointers
   132  func UpdateElement(fd int, key, value unsafe.Pointer, flags uint64) error {
   133  	uba := bpfAttrMapOpElem{
   134  		mapFd: uint32(fd),
   135  		key:   uint64(uintptr(key)),
   136  		value: uint64(uintptr(value)),
   137  		flags: uint64(flags),
   138  	}
   139  
   140  	ret := UpdateElementFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba))
   141  	runtime.KeepAlive(key)
   142  	runtime.KeepAlive(value)
   143  	return ret
   144  }
   145  
   146  // LookupElement looks up for the map value stored in fd with the given key. The value
   147  // is stored in the value unsafe.Pointer.
   148  func LookupElementFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error {
   149  	var duration *spanstat.SpanStat
   150  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   151  		duration = spanstat.Start()
   152  	}
   153  	ret, _, err := unix.Syscall(
   154  		unix.SYS_BPF,
   155  		BPF_MAP_LOOKUP_ELEM,
   156  		uintptr(structPtr),
   157  		sizeOfStruct,
   158  	)
   159  	runtime.KeepAlive(structPtr)
   160  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   161  		metrics.BPFSyscallDuration.WithLabelValues(metricOpLookup, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   162  	}
   163  
   164  	if ret != 0 || err != 0 {
   165  		return fmt.Errorf("Unable to lookup element in map with file descriptor %d: %s", fd, err)
   166  	}
   167  
   168  	return nil
   169  }
   170  
   171  // LookupElement looks up for the map value stored in fd with the given key. The value
   172  // is stored in the value unsafe.Pointer.
   173  // Deprecated, use LookupElementFromPointers
   174  func LookupElement(fd int, key, value unsafe.Pointer) error {
   175  	uba := bpfAttrMapOpElem{
   176  		mapFd: uint32(fd),
   177  		key:   uint64(uintptr(key)),
   178  		value: uint64(uintptr(value)),
   179  	}
   180  
   181  	ret := LookupElementFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba))
   182  	runtime.KeepAlive(key)
   183  	runtime.KeepAlive(value)
   184  	return ret
   185  }
   186  
   187  func deleteElement(fd int, key unsafe.Pointer) (uintptr, syscall.Errno) {
   188  	uba := bpfAttrMapOpElem{
   189  		mapFd: uint32(fd),
   190  		key:   uint64(uintptr(key)),
   191  	}
   192  	var duration *spanstat.SpanStat
   193  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   194  		duration = spanstat.Start()
   195  	}
   196  	ret, _, err := unix.Syscall(
   197  		unix.SYS_BPF,
   198  		BPF_MAP_DELETE_ELEM,
   199  		uintptr(unsafe.Pointer(&uba)),
   200  		unsafe.Sizeof(uba),
   201  	)
   202  	runtime.KeepAlive(key)
   203  	runtime.KeepAlive(&uba)
   204  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   205  		metrics.BPFSyscallDuration.WithLabelValues(metricOpDelete, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   206  	}
   207  
   208  	return ret, err
   209  }
   210  
   211  // DeleteElement deletes the map element with the given key.
   212  func DeleteElement(fd int, key unsafe.Pointer) error {
   213  	ret, err := deleteElement(fd, key)
   214  
   215  	if ret != 0 || err != 0 {
   216  		return fmt.Errorf("Unable to delete element from map with file descriptor %d: %s", fd, err)
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  // GetNextKeyFromPointers stores, in nextKey, the next key after the key of the map in fd.
   223  func GetNextKeyFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error {
   224  	var duration *spanstat.SpanStat
   225  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   226  		duration = spanstat.Start()
   227  	}
   228  	ret, _, err := unix.Syscall(
   229  		unix.SYS_BPF,
   230  		BPF_MAP_GET_NEXT_KEY,
   231  		uintptr(structPtr),
   232  		sizeOfStruct,
   233  	)
   234  	runtime.KeepAlive(structPtr)
   235  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   236  		metrics.BPFSyscallDuration.WithLabelValues(metricOpGetNextKey, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   237  	}
   238  
   239  	if ret != 0 || err != 0 {
   240  		return fmt.Errorf("Unable to get next key from map with file descriptor %d: %s", fd, err)
   241  	}
   242  
   243  	return nil
   244  }
   245  
   246  // GetNextKey stores, in nextKey, the next key after the key of the map in fd.
   247  // Deprecated, use GetNextKeyFromPointers
   248  func GetNextKey(fd int, key, nextKey unsafe.Pointer) error {
   249  	uba := bpfAttrMapOpElem{
   250  		mapFd: uint32(fd),
   251  		key:   uint64(uintptr(key)),
   252  		value: uint64(uintptr(nextKey)),
   253  	}
   254  
   255  	ret := GetNextKeyFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba))
   256  	runtime.KeepAlive(key)
   257  	runtime.KeepAlive(nextKey)
   258  	return ret
   259  }
   260  
   261  // GetFirstKey fetches the first key in the map.
   262  func GetFirstKey(fd int, nextKey unsafe.Pointer) error {
   263  	uba := bpfAttrMapOpElem{
   264  		mapFd: uint32(fd),
   265  		key:   0, // NULL -> Get first element
   266  		value: uint64(uintptr(nextKey)),
   267  	}
   268  
   269  	ret := GetNextKeyFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba))
   270  	runtime.KeepAlive(nextKey)
   271  	return ret
   272  }
   273  
   274  // This struct must be in sync with union bpf_attr's anonymous struct used by
   275  // BPF_OBJ_*_ commands
   276  type bpfAttrObjOp struct {
   277  	pathname uint64
   278  	fd       uint32
   279  	pad0     [4]byte
   280  }
   281  
   282  // ObjPin stores the map's fd in pathname.
   283  func ObjPin(fd int, pathname string) error {
   284  	pathStr := syscall.StringBytePtr(pathname)
   285  	uba := bpfAttrObjOp{
   286  		pathname: uint64(uintptr(unsafe.Pointer(pathStr))),
   287  		fd:       uint32(fd),
   288  	}
   289  
   290  	var duration *spanstat.SpanStat
   291  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   292  		duration = spanstat.Start()
   293  	}
   294  	ret, _, err := unix.Syscall(
   295  		unix.SYS_BPF,
   296  		BPF_OBJ_PIN,
   297  		uintptr(unsafe.Pointer(&uba)),
   298  		unsafe.Sizeof(uba),
   299  	)
   300  	runtime.KeepAlive(pathStr)
   301  	runtime.KeepAlive(&uba)
   302  
   303  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   304  		metrics.BPFSyscallDuration.WithLabelValues(metricOpObjPin, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   305  	}
   306  
   307  	if ret != 0 || err != 0 {
   308  		return fmt.Errorf("Unable to pin object with file descriptor %d to %s: %s", fd, pathname, err)
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  // ObjGet reads the pathname and returns the map's fd read.
   315  func ObjGet(pathname string) (int, error) {
   316  	pathStr := syscall.StringBytePtr(pathname)
   317  	uba := bpfAttrObjOp{
   318  		pathname: uint64(uintptr(unsafe.Pointer(pathStr))),
   319  	}
   320  
   321  	var duration *spanstat.SpanStat
   322  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   323  		duration = spanstat.Start()
   324  	}
   325  	fd, _, err := unix.Syscall(
   326  		unix.SYS_BPF,
   327  		BPF_OBJ_GET,
   328  		uintptr(unsafe.Pointer(&uba)),
   329  		unsafe.Sizeof(uba),
   330  	)
   331  	runtime.KeepAlive(pathStr)
   332  	runtime.KeepAlive(&uba)
   333  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   334  		metrics.BPFSyscallDuration.WithLabelValues(metricOpObjGet, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   335  	}
   336  
   337  	if fd == 0 || err != 0 {
   338  		return 0, &os.PathError{
   339  			Op:   "Unable to get object",
   340  			Err:  err,
   341  			Path: pathname,
   342  		}
   343  	}
   344  
   345  	return int(fd), nil
   346  }
   347  
   348  type bpfAttrFdFromId struct {
   349  	ID     uint32
   350  	NextID uint32
   351  	Flags  uint32
   352  }
   353  
   354  // MapFdFromID retrieves a file descriptor based on a map ID.
   355  func MapFdFromID(id int) (int, error) {
   356  	uba := bpfAttrFdFromId{
   357  		ID: uint32(id),
   358  	}
   359  
   360  	var duration *spanstat.SpanStat
   361  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   362  		duration = spanstat.Start()
   363  	}
   364  	fd, _, err := unix.Syscall(
   365  		unix.SYS_BPF,
   366  		BPF_MAP_GET_FD_BY_ID,
   367  		uintptr(unsafe.Pointer(&uba)),
   368  		unsafe.Sizeof(uba),
   369  	)
   370  	runtime.KeepAlive(&uba)
   371  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   372  		metrics.BPFSyscallDuration.WithLabelValues(metricOpGetFDByID, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds())
   373  	}
   374  
   375  	if fd == 0 || err != 0 {
   376  		return 0, fmt.Errorf("Unable to get object fd from id %d: %s", id, err)
   377  	}
   378  
   379  	return int(fd), nil
   380  }
   381  
   382  // ObjClose closes the map's fd.
   383  func ObjClose(fd int) error {
   384  	if fd > 0 {
   385  		return unix.Close(fd)
   386  	}
   387  	return nil
   388  }
   389  
   390  func objCheck(fd int, path string, mapType int, keySize, valueSize, maxEntries, flags uint32) bool {
   391  	info, err := GetMapInfo(os.Getpid(), fd)
   392  	if err != nil {
   393  		return false
   394  	}
   395  
   396  	scopedLog := log.WithField(logfields.Path, path)
   397  	mismatch := false
   398  
   399  	if int(info.MapType) != mapType {
   400  		scopedLog.WithFields(logrus.Fields{
   401  			"old": info.MapType,
   402  			"new": MapType(mapType),
   403  		}).Warning("Map type mismatch for BPF map")
   404  		mismatch = true
   405  	}
   406  
   407  	if info.KeySize != keySize {
   408  		scopedLog.WithFields(logrus.Fields{
   409  			"old": info.KeySize,
   410  			"new": keySize,
   411  		}).Warning("Key-size mismatch for BPF map")
   412  		mismatch = true
   413  	}
   414  
   415  	if info.ValueSize != valueSize {
   416  		scopedLog.WithFields(logrus.Fields{
   417  			"old": info.ValueSize,
   418  			"new": valueSize,
   419  		}).Warning("Value-size mismatch for BPF map")
   420  		mismatch = true
   421  	}
   422  
   423  	if info.MaxEntries != maxEntries {
   424  		scopedLog.WithFields(logrus.Fields{
   425  			"old": info.MaxEntries,
   426  			"new": maxEntries,
   427  		}).Warning("Max entries mismatch for BPF map")
   428  		mismatch = true
   429  	}
   430  	if info.Flags != flags {
   431  		scopedLog.WithFields(logrus.Fields{
   432  			"old": info.Flags,
   433  			"new": flags,
   434  		}).Warning("Flags mismatch for BPF map")
   435  		mismatch = true
   436  	}
   437  
   438  	if mismatch {
   439  		if info.MapType == MapTypeProgArray {
   440  			return false
   441  		}
   442  
   443  		scopedLog.Warning("Removing map to allow for property upgrade (expect map data loss)")
   444  
   445  		// Kernel still holds map reference count via attached prog.
   446  		// Only exception is prog array, but that is already resolved
   447  		// differently.
   448  		os.Remove(path)
   449  		return true
   450  	}
   451  
   452  	return false
   453  }
   454  
   455  func OpenOrCreateMap(path string, mapType int, keySize, valueSize, maxEntries, flags uint32, innerID uint32, pin bool) (int, bool, error) {
   456  	var fd int
   457  
   458  	redo := false
   459  	isNewMap := false
   460  
   461  recreate:
   462  	if _, err := os.Stat(path); os.IsNotExist(err) || redo {
   463  		mapDir := filepath.Dir(path)
   464  		if _, err = os.Stat(mapDir); os.IsNotExist(err) {
   465  			if err = os.MkdirAll(mapDir, 0755); err != nil {
   466  				return 0, isNewMap, &os.PathError{
   467  					Op:   "Unable create map base directory",
   468  					Path: path,
   469  					Err:  err,
   470  				}
   471  			}
   472  		}
   473  
   474  		fd, err = CreateMap(
   475  			mapType,
   476  			keySize,
   477  			valueSize,
   478  			maxEntries,
   479  			flags,
   480  			innerID,
   481  			path,
   482  		)
   483  
   484  		defer func() {
   485  			if err != nil {
   486  				// In case of error, we need to close
   487  				// this fd since it was open by CreateMap
   488  				ObjClose(fd)
   489  			}
   490  		}()
   491  
   492  		isNewMap = true
   493  
   494  		if err != nil {
   495  			return 0, isNewMap, err
   496  		}
   497  
   498  		if pin {
   499  			err = ObjPin(fd, path)
   500  			if err != nil {
   501  				return 0, isNewMap, err
   502  			}
   503  		}
   504  
   505  		return fd, isNewMap, nil
   506  	}
   507  
   508  	fd, err := ObjGet(path)
   509  	if err == nil {
   510  		redo = objCheck(
   511  			fd,
   512  			path,
   513  			mapType,
   514  			keySize,
   515  			valueSize,
   516  			maxEntries,
   517  			flags,
   518  		)
   519  		if redo == true {
   520  			ObjClose(fd)
   521  			goto recreate
   522  		}
   523  	}
   524  
   525  	return fd, isNewMap, err
   526  }
   527  
   528  // GetMtime returns monotonic time that can be used to compare
   529  // values with ktime_get_ns() BPF helper, e.g. needed to check
   530  // the timeout in sec for BPF entries. We return the raw nsec,
   531  // although that is not quite usable for comparison. Go has
   532  // runtime.nanotime() but doesn't expose it as API.
   533  func GetMtime() (uint64, error) {
   534  	var ts unix.Timespec
   535  
   536  	err := unix.ClockGettime(unix.CLOCK_MONOTONIC, &ts)
   537  	if err != nil {
   538  		return 0, fmt.Errorf("Unable get time: %s", err)
   539  	}
   540  
   541  	return uint64(unix.TimespecToNsec(ts)), nil
   542  }
   543  
   544  type bpfAttrProg struct {
   545  	ProgType    uint32
   546  	InsnCnt     uint32
   547  	Insns       uintptr
   548  	License     uintptr
   549  	LogLevel    uint32
   550  	LogSize     uint32
   551  	LogBuf      uintptr
   552  	KernVersion uint32
   553  	Flags       uint32
   554  	Name        [16]byte
   555  	Ifindex     uint32
   556  	AttachType  uint32
   557  }
   558  
   559  // TestDummyProg loads a minimal BPF program into the kernel and probes
   560  // whether it succeeds in doing so. This can be used to bail out early
   561  // in the daemon when a given type is not supported.
   562  func TestDummyProg(progType ProgType, attachType uint32) error {
   563  	var oldLim unix.Rlimit
   564  	insns := []byte{
   565  		// R0 = 1; EXIT
   566  		0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
   567  		0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   568  	}
   569  	license := []byte{'A', 'S', 'L', '2', '\x00'}
   570  	bpfAttr := bpfAttrProg{
   571  		ProgType:   uint32(progType),
   572  		AttachType: uint32(attachType),
   573  		InsnCnt:    uint32(len(insns) / 8),
   574  		Insns:      uintptr(unsafe.Pointer(&insns[0])),
   575  		License:    uintptr(unsafe.Pointer(&license[0])),
   576  	}
   577  	tmpLim := unix.Rlimit{
   578  		Cur: math.MaxUint64,
   579  		Max: math.MaxUint64,
   580  	}
   581  	err := unix.Getrlimit(unix.RLIMIT_MEMLOCK, &oldLim)
   582  	if err != nil {
   583  		return err
   584  	}
   585  	err = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &tmpLim)
   586  	if err != nil {
   587  		return err
   588  	}
   589  	fd, _, errno := unix.Syscall(unix.SYS_BPF, BPF_PROG_LOAD,
   590  		uintptr(unsafe.Pointer(&bpfAttr)),
   591  		unsafe.Sizeof(bpfAttr))
   592  	err = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &oldLim)
   593  	if errno == 0 {
   594  		unix.Close(int(fd))
   595  		if err != nil {
   596  			return err
   597  		}
   598  		return nil
   599  	}
   600  
   601  	runtime.KeepAlive(&insns)
   602  	runtime.KeepAlive(&license)
   603  	runtime.KeepAlive(&bpfAttr)
   604  
   605  	return errno
   606  }