github.com/imran-kn/cilium-fork@v1.6.9/pkg/bpf/bpf_linux.go (about) 1 // Copyright 2016-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux 16 17 package bpf 18 19 import ( 20 "fmt" 21 "math" 22 "os" 23 "path/filepath" 24 "runtime" 25 "syscall" 26 "unsafe" 27 28 "github.com/cilium/cilium/pkg/logging/logfields" 29 "github.com/cilium/cilium/pkg/metrics" 30 "github.com/cilium/cilium/pkg/option" 31 "github.com/cilium/cilium/pkg/spanstat" 32 33 "github.com/sirupsen/logrus" 34 "golang.org/x/sys/unix" 35 ) 36 37 // CreateMap creates a Map of type mapType, with key size keySize, a value size of 38 // valueSize and the maximum amount of entries of maxEntries. 39 // mapType should be one of the bpf_map_type in "uapi/linux/bpf.h" 40 // When mapType is the type HASH_OF_MAPS an innerID is required to point at a 41 // map fd which has the same type/keySize/valueSize/maxEntries as expected map 42 // entries. For all other mapTypes innerID is ignored and should be zeroed. 43 func CreateMap(mapType int, keySize, valueSize, maxEntries, flags, innerID uint32, path string) (int, error) { 44 // This struct must be in sync with union bpf_attr's anonymous struct 45 // used by the BPF_MAP_CREATE command 46 uba := struct { 47 mapType uint32 48 keySize uint32 49 valueSize uint32 50 maxEntries uint32 51 mapFlags uint32 52 innerID uint32 53 }{ 54 uint32(mapType), 55 keySize, 56 valueSize, 57 maxEntries, 58 flags, 59 innerID, 60 } 61 62 var duration *spanstat.SpanStat 63 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 64 duration = spanstat.Start() 65 } 66 ret, _, err := unix.Syscall( 67 unix.SYS_BPF, 68 BPF_MAP_CREATE, 69 uintptr(unsafe.Pointer(&uba)), 70 unsafe.Sizeof(uba), 71 ) 72 runtime.KeepAlive(&uba) 73 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 74 metrics.BPFSyscallDuration.WithLabelValues(metricOpCreate, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 75 } 76 77 if err != 0 { 78 return 0, &os.PathError{ 79 Op: "Unable to create map", 80 Path: path, 81 Err: err, 82 } 83 } 84 85 return int(ret), nil 86 } 87 88 // This struct must be in sync with union bpf_attr's anonymous struct used by 89 // BPF_MAP_*_ELEM commands 90 type bpfAttrMapOpElem struct { 91 mapFd uint32 92 pad0 [4]byte 93 key uint64 94 value uint64 // union: value or next_key 95 flags uint64 96 } 97 98 // UpdateElementFromPointers updates the map in fd with the given value in the given key. 99 // The flags can have the following values: 100 // bpf.BPF_ANY to create new element or update existing; 101 // bpf.BPF_NOEXIST to create new element if it didn't exist; 102 // bpf.BPF_EXIST to update existing element. 103 func UpdateElementFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error { 104 var duration *spanstat.SpanStat 105 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 106 duration = spanstat.Start() 107 } 108 ret, _, err := unix.Syscall( 109 unix.SYS_BPF, 110 BPF_MAP_UPDATE_ELEM, 111 uintptr(structPtr), 112 sizeOfStruct, 113 ) 114 runtime.KeepAlive(structPtr) 115 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 116 metrics.BPFSyscallDuration.WithLabelValues(metricOpUpdate, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 117 } 118 119 if ret != 0 || err != 0 { 120 return fmt.Errorf("Unable to update element for map with file descriptor %d: %s", fd, err) 121 } 122 123 return nil 124 } 125 126 // UpdateElement updates the map in fd with the given value in the given key. 127 // The flags can have the following values: 128 // bpf.BPF_ANY to create new element or update existing; 129 // bpf.BPF_NOEXIST to create new element if it didn't exist; 130 // bpf.BPF_EXIST to update existing element. 131 // Deprecated, use UpdateElementFromPointers 132 func UpdateElement(fd int, key, value unsafe.Pointer, flags uint64) error { 133 uba := bpfAttrMapOpElem{ 134 mapFd: uint32(fd), 135 key: uint64(uintptr(key)), 136 value: uint64(uintptr(value)), 137 flags: uint64(flags), 138 } 139 140 ret := UpdateElementFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba)) 141 runtime.KeepAlive(key) 142 runtime.KeepAlive(value) 143 return ret 144 } 145 146 // LookupElement looks up for the map value stored in fd with the given key. The value 147 // is stored in the value unsafe.Pointer. 148 func LookupElementFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error { 149 var duration *spanstat.SpanStat 150 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 151 duration = spanstat.Start() 152 } 153 ret, _, err := unix.Syscall( 154 unix.SYS_BPF, 155 BPF_MAP_LOOKUP_ELEM, 156 uintptr(structPtr), 157 sizeOfStruct, 158 ) 159 runtime.KeepAlive(structPtr) 160 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 161 metrics.BPFSyscallDuration.WithLabelValues(metricOpLookup, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 162 } 163 164 if ret != 0 || err != 0 { 165 return fmt.Errorf("Unable to lookup element in map with file descriptor %d: %s", fd, err) 166 } 167 168 return nil 169 } 170 171 // LookupElement looks up for the map value stored in fd with the given key. The value 172 // is stored in the value unsafe.Pointer. 173 // Deprecated, use LookupElementFromPointers 174 func LookupElement(fd int, key, value unsafe.Pointer) error { 175 uba := bpfAttrMapOpElem{ 176 mapFd: uint32(fd), 177 key: uint64(uintptr(key)), 178 value: uint64(uintptr(value)), 179 } 180 181 ret := LookupElementFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba)) 182 runtime.KeepAlive(key) 183 runtime.KeepAlive(value) 184 return ret 185 } 186 187 func deleteElement(fd int, key unsafe.Pointer) (uintptr, syscall.Errno) { 188 uba := bpfAttrMapOpElem{ 189 mapFd: uint32(fd), 190 key: uint64(uintptr(key)), 191 } 192 var duration *spanstat.SpanStat 193 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 194 duration = spanstat.Start() 195 } 196 ret, _, err := unix.Syscall( 197 unix.SYS_BPF, 198 BPF_MAP_DELETE_ELEM, 199 uintptr(unsafe.Pointer(&uba)), 200 unsafe.Sizeof(uba), 201 ) 202 runtime.KeepAlive(key) 203 runtime.KeepAlive(&uba) 204 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 205 metrics.BPFSyscallDuration.WithLabelValues(metricOpDelete, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 206 } 207 208 return ret, err 209 } 210 211 // DeleteElement deletes the map element with the given key. 212 func DeleteElement(fd int, key unsafe.Pointer) error { 213 ret, err := deleteElement(fd, key) 214 215 if ret != 0 || err != 0 { 216 return fmt.Errorf("Unable to delete element from map with file descriptor %d: %s", fd, err) 217 } 218 219 return nil 220 } 221 222 // GetNextKeyFromPointers stores, in nextKey, the next key after the key of the map in fd. 223 func GetNextKeyFromPointers(fd int, structPtr unsafe.Pointer, sizeOfStruct uintptr) error { 224 var duration *spanstat.SpanStat 225 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 226 duration = spanstat.Start() 227 } 228 ret, _, err := unix.Syscall( 229 unix.SYS_BPF, 230 BPF_MAP_GET_NEXT_KEY, 231 uintptr(structPtr), 232 sizeOfStruct, 233 ) 234 runtime.KeepAlive(structPtr) 235 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 236 metrics.BPFSyscallDuration.WithLabelValues(metricOpGetNextKey, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 237 } 238 239 if ret != 0 || err != 0 { 240 return fmt.Errorf("Unable to get next key from map with file descriptor %d: %s", fd, err) 241 } 242 243 return nil 244 } 245 246 // GetNextKey stores, in nextKey, the next key after the key of the map in fd. 247 // Deprecated, use GetNextKeyFromPointers 248 func GetNextKey(fd int, key, nextKey unsafe.Pointer) error { 249 uba := bpfAttrMapOpElem{ 250 mapFd: uint32(fd), 251 key: uint64(uintptr(key)), 252 value: uint64(uintptr(nextKey)), 253 } 254 255 ret := GetNextKeyFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba)) 256 runtime.KeepAlive(key) 257 runtime.KeepAlive(nextKey) 258 return ret 259 } 260 261 // GetFirstKey fetches the first key in the map. 262 func GetFirstKey(fd int, nextKey unsafe.Pointer) error { 263 uba := bpfAttrMapOpElem{ 264 mapFd: uint32(fd), 265 key: 0, // NULL -> Get first element 266 value: uint64(uintptr(nextKey)), 267 } 268 269 ret := GetNextKeyFromPointers(fd, unsafe.Pointer(&uba), unsafe.Sizeof(uba)) 270 runtime.KeepAlive(nextKey) 271 return ret 272 } 273 274 // This struct must be in sync with union bpf_attr's anonymous struct used by 275 // BPF_OBJ_*_ commands 276 type bpfAttrObjOp struct { 277 pathname uint64 278 fd uint32 279 pad0 [4]byte 280 } 281 282 // ObjPin stores the map's fd in pathname. 283 func ObjPin(fd int, pathname string) error { 284 pathStr := syscall.StringBytePtr(pathname) 285 uba := bpfAttrObjOp{ 286 pathname: uint64(uintptr(unsafe.Pointer(pathStr))), 287 fd: uint32(fd), 288 } 289 290 var duration *spanstat.SpanStat 291 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 292 duration = spanstat.Start() 293 } 294 ret, _, err := unix.Syscall( 295 unix.SYS_BPF, 296 BPF_OBJ_PIN, 297 uintptr(unsafe.Pointer(&uba)), 298 unsafe.Sizeof(uba), 299 ) 300 runtime.KeepAlive(pathStr) 301 runtime.KeepAlive(&uba) 302 303 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 304 metrics.BPFSyscallDuration.WithLabelValues(metricOpObjPin, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 305 } 306 307 if ret != 0 || err != 0 { 308 return fmt.Errorf("Unable to pin object with file descriptor %d to %s: %s", fd, pathname, err) 309 } 310 311 return nil 312 } 313 314 // ObjGet reads the pathname and returns the map's fd read. 315 func ObjGet(pathname string) (int, error) { 316 pathStr := syscall.StringBytePtr(pathname) 317 uba := bpfAttrObjOp{ 318 pathname: uint64(uintptr(unsafe.Pointer(pathStr))), 319 } 320 321 var duration *spanstat.SpanStat 322 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 323 duration = spanstat.Start() 324 } 325 fd, _, err := unix.Syscall( 326 unix.SYS_BPF, 327 BPF_OBJ_GET, 328 uintptr(unsafe.Pointer(&uba)), 329 unsafe.Sizeof(uba), 330 ) 331 runtime.KeepAlive(pathStr) 332 runtime.KeepAlive(&uba) 333 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 334 metrics.BPFSyscallDuration.WithLabelValues(metricOpObjGet, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 335 } 336 337 if fd == 0 || err != 0 { 338 return 0, &os.PathError{ 339 Op: "Unable to get object", 340 Err: err, 341 Path: pathname, 342 } 343 } 344 345 return int(fd), nil 346 } 347 348 type bpfAttrFdFromId struct { 349 ID uint32 350 NextID uint32 351 Flags uint32 352 } 353 354 // MapFdFromID retrieves a file descriptor based on a map ID. 355 func MapFdFromID(id int) (int, error) { 356 uba := bpfAttrFdFromId{ 357 ID: uint32(id), 358 } 359 360 var duration *spanstat.SpanStat 361 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 362 duration = spanstat.Start() 363 } 364 fd, _, err := unix.Syscall( 365 unix.SYS_BPF, 366 BPF_MAP_GET_FD_BY_ID, 367 uintptr(unsafe.Pointer(&uba)), 368 unsafe.Sizeof(uba), 369 ) 370 runtime.KeepAlive(&uba) 371 if option.Config.MetricsConfig.BPFSyscallDurationEnabled { 372 metrics.BPFSyscallDuration.WithLabelValues(metricOpGetFDByID, metrics.Errno2Outcome(err)).Observe(duration.End(err == 0).Total().Seconds()) 373 } 374 375 if fd == 0 || err != 0 { 376 return 0, fmt.Errorf("Unable to get object fd from id %d: %s", id, err) 377 } 378 379 return int(fd), nil 380 } 381 382 // ObjClose closes the map's fd. 383 func ObjClose(fd int) error { 384 if fd > 0 { 385 return unix.Close(fd) 386 } 387 return nil 388 } 389 390 func objCheck(fd int, path string, mapType int, keySize, valueSize, maxEntries, flags uint32) bool { 391 info, err := GetMapInfo(os.Getpid(), fd) 392 if err != nil { 393 return false 394 } 395 396 scopedLog := log.WithField(logfields.Path, path) 397 mismatch := false 398 399 if int(info.MapType) != mapType { 400 scopedLog.WithFields(logrus.Fields{ 401 "old": info.MapType, 402 "new": MapType(mapType), 403 }).Warning("Map type mismatch for BPF map") 404 mismatch = true 405 } 406 407 if info.KeySize != keySize { 408 scopedLog.WithFields(logrus.Fields{ 409 "old": info.KeySize, 410 "new": keySize, 411 }).Warning("Key-size mismatch for BPF map") 412 mismatch = true 413 } 414 415 if info.ValueSize != valueSize { 416 scopedLog.WithFields(logrus.Fields{ 417 "old": info.ValueSize, 418 "new": valueSize, 419 }).Warning("Value-size mismatch for BPF map") 420 mismatch = true 421 } 422 423 if info.MaxEntries != maxEntries { 424 scopedLog.WithFields(logrus.Fields{ 425 "old": info.MaxEntries, 426 "new": maxEntries, 427 }).Warning("Max entries mismatch for BPF map") 428 mismatch = true 429 } 430 if info.Flags != flags { 431 scopedLog.WithFields(logrus.Fields{ 432 "old": info.Flags, 433 "new": flags, 434 }).Warning("Flags mismatch for BPF map") 435 mismatch = true 436 } 437 438 if mismatch { 439 if info.MapType == MapTypeProgArray { 440 return false 441 } 442 443 scopedLog.Warning("Removing map to allow for property upgrade (expect map data loss)") 444 445 // Kernel still holds map reference count via attached prog. 446 // Only exception is prog array, but that is already resolved 447 // differently. 448 os.Remove(path) 449 return true 450 } 451 452 return false 453 } 454 455 func OpenOrCreateMap(path string, mapType int, keySize, valueSize, maxEntries, flags uint32, innerID uint32, pin bool) (int, bool, error) { 456 var fd int 457 458 redo := false 459 isNewMap := false 460 461 recreate: 462 if _, err := os.Stat(path); os.IsNotExist(err) || redo { 463 mapDir := filepath.Dir(path) 464 if _, err = os.Stat(mapDir); os.IsNotExist(err) { 465 if err = os.MkdirAll(mapDir, 0755); err != nil { 466 return 0, isNewMap, &os.PathError{ 467 Op: "Unable create map base directory", 468 Path: path, 469 Err: err, 470 } 471 } 472 } 473 474 fd, err = CreateMap( 475 mapType, 476 keySize, 477 valueSize, 478 maxEntries, 479 flags, 480 innerID, 481 path, 482 ) 483 484 defer func() { 485 if err != nil { 486 // In case of error, we need to close 487 // this fd since it was open by CreateMap 488 ObjClose(fd) 489 } 490 }() 491 492 isNewMap = true 493 494 if err != nil { 495 return 0, isNewMap, err 496 } 497 498 if pin { 499 err = ObjPin(fd, path) 500 if err != nil { 501 return 0, isNewMap, err 502 } 503 } 504 505 return fd, isNewMap, nil 506 } 507 508 fd, err := ObjGet(path) 509 if err == nil { 510 redo = objCheck( 511 fd, 512 path, 513 mapType, 514 keySize, 515 valueSize, 516 maxEntries, 517 flags, 518 ) 519 if redo == true { 520 ObjClose(fd) 521 goto recreate 522 } 523 } 524 525 return fd, isNewMap, err 526 } 527 528 // GetMtime returns monotonic time that can be used to compare 529 // values with ktime_get_ns() BPF helper, e.g. needed to check 530 // the timeout in sec for BPF entries. We return the raw nsec, 531 // although that is not quite usable for comparison. Go has 532 // runtime.nanotime() but doesn't expose it as API. 533 func GetMtime() (uint64, error) { 534 var ts unix.Timespec 535 536 err := unix.ClockGettime(unix.CLOCK_MONOTONIC, &ts) 537 if err != nil { 538 return 0, fmt.Errorf("Unable get time: %s", err) 539 } 540 541 return uint64(unix.TimespecToNsec(ts)), nil 542 } 543 544 type bpfAttrProg struct { 545 ProgType uint32 546 InsnCnt uint32 547 Insns uintptr 548 License uintptr 549 LogLevel uint32 550 LogSize uint32 551 LogBuf uintptr 552 KernVersion uint32 553 Flags uint32 554 Name [16]byte 555 Ifindex uint32 556 AttachType uint32 557 } 558 559 // TestDummyProg loads a minimal BPF program into the kernel and probes 560 // whether it succeeds in doing so. This can be used to bail out early 561 // in the daemon when a given type is not supported. 562 func TestDummyProg(progType ProgType, attachType uint32) error { 563 var oldLim unix.Rlimit 564 insns := []byte{ 565 // R0 = 1; EXIT 566 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 567 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 568 } 569 license := []byte{'A', 'S', 'L', '2', '\x00'} 570 bpfAttr := bpfAttrProg{ 571 ProgType: uint32(progType), 572 AttachType: uint32(attachType), 573 InsnCnt: uint32(len(insns) / 8), 574 Insns: uintptr(unsafe.Pointer(&insns[0])), 575 License: uintptr(unsafe.Pointer(&license[0])), 576 } 577 tmpLim := unix.Rlimit{ 578 Cur: math.MaxUint64, 579 Max: math.MaxUint64, 580 } 581 err := unix.Getrlimit(unix.RLIMIT_MEMLOCK, &oldLim) 582 if err != nil { 583 return err 584 } 585 err = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &tmpLim) 586 if err != nil { 587 return err 588 } 589 fd, _, errno := unix.Syscall(unix.SYS_BPF, BPF_PROG_LOAD, 590 uintptr(unsafe.Pointer(&bpfAttr)), 591 unsafe.Sizeof(bpfAttr)) 592 err = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &oldLim) 593 if errno == 0 { 594 unix.Close(int(fd)) 595 if err != nil { 596 return err 597 } 598 return nil 599 } 600 601 runtime.KeepAlive(&insns) 602 runtime.KeepAlive(&license) 603 runtime.KeepAlive(&bpfAttr) 604 605 return errno 606 }