github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/socket/netlink/route/protocol.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package route provides a NETLINK_ROUTE socket protocol. 16 package route 17 18 import ( 19 "bytes" 20 21 "github.com/metacubex/gvisor/pkg/abi/linux" 22 "github.com/metacubex/gvisor/pkg/context" 23 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 24 "github.com/metacubex/gvisor/pkg/marshal/primitive" 25 "github.com/metacubex/gvisor/pkg/sentry/inet" 26 "github.com/metacubex/gvisor/pkg/sentry/kernel" 27 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 28 "github.com/metacubex/gvisor/pkg/sentry/socket/netlink" 29 "github.com/metacubex/gvisor/pkg/syserr" 30 ) 31 32 // commandKind describes the operational class of a message type. 33 // 34 // The route message types use the lower 2 bits of the type to describe class 35 // of command. 36 type commandKind int 37 38 const ( 39 kindNew commandKind = 0x0 40 kindDel commandKind = 0x1 41 kindGet commandKind = 0x2 42 kindSet commandKind = 0x3 43 ) 44 45 func typeKind(typ uint16) commandKind { 46 return commandKind(typ & 0x3) 47 } 48 49 // Protocol implements netlink.Protocol. 50 // 51 // +stateify savable 52 type Protocol struct{} 53 54 var _ netlink.Protocol = (*Protocol)(nil) 55 56 // NewProtocol creates a NETLINK_ROUTE netlink.Protocol. 57 func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) { 58 return &Protocol{}, nil 59 } 60 61 // Protocol implements netlink.Protocol.Protocol. 62 func (p *Protocol) Protocol() int { 63 return linux.NETLINK_ROUTE 64 } 65 66 // CanSend implements netlink.Protocol.CanSend. 67 func (p *Protocol) CanSend() bool { 68 return true 69 } 70 71 // dumpLinks handles RTM_GETLINK dump requests. 72 func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 73 // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an 74 // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some 75 // userspace applications (including glibc) still include rtgenmsg. 76 // Linux has a workaround based on the total message length. 77 // 78 // We don't bother to check for either, since we don't support any 79 // extra attributes that may be included anyways. 80 // 81 // The message may also contain netlink attribute IFLA_EXT_MASK, which 82 // we don't support. 83 84 // The RTM_GETLINK dump response is a set of messages each containing 85 // an InterfaceInfoMessage followed by a set of netlink attributes. 86 87 // We always send back an NLMSG_DONE. 88 ms.Multi = true 89 90 stack := inet.StackFromContext(ctx) 91 if stack == nil { 92 // No network devices. 93 return nil 94 } 95 96 for idx, i := range stack.Interfaces() { 97 addNewLinkMessage(ms, idx, i) 98 } 99 100 return nil 101 } 102 103 // getLinks handles RTM_GETLINK requests. 104 func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 105 stack := inet.StackFromContext(ctx) 106 if stack == nil { 107 // No network devices. 108 return nil 109 } 110 111 // Parse message. 112 var ifi linux.InterfaceInfoMessage 113 attrs, ok := msg.GetData(&ifi) 114 if !ok { 115 return syserr.ErrInvalidArgument 116 } 117 118 // Parse attributes. 119 var byName []byte 120 for !attrs.Empty() { 121 ahdr, value, rest, ok := attrs.ParseFirst() 122 if !ok { 123 return syserr.ErrInvalidArgument 124 } 125 attrs = rest 126 127 switch ahdr.Type { 128 case linux.IFLA_IFNAME: 129 if len(value) < 1 { 130 return syserr.ErrInvalidArgument 131 } 132 byName = value[:len(value)-1] 133 134 // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK. 135 } 136 } 137 138 found := false 139 for idx, i := range stack.Interfaces() { 140 switch { 141 case ifi.Index > 0: 142 if idx != ifi.Index { 143 continue 144 } 145 case byName != nil: 146 if string(byName) != i.Name { 147 continue 148 } 149 default: 150 // Criteria not specified. 151 return syserr.ErrInvalidArgument 152 } 153 154 addNewLinkMessage(ms, idx, i) 155 found = true 156 break 157 } 158 if !found { 159 return syserr.ErrNoDevice 160 } 161 return nil 162 } 163 164 func (p *Protocol) newLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 165 stack := inet.StackFromContext(ctx) 166 if stack == nil { 167 // No network stack. 168 return syserr.ErrProtocolNotSupported 169 } 170 171 var ifinfomsg linux.InterfaceInfoMessage 172 attrs, ok := msg.GetData(&ifinfomsg) 173 if !ok { 174 return syserr.ErrInvalidArgument 175 } 176 for !attrs.Empty() { 177 // The index is unspecified, search by the interface name. 178 ahdr, value, rest, ok := attrs.ParseFirst() 179 if !ok { 180 return syserr.ErrInvalidArgument 181 } 182 attrs = rest 183 switch ahdr.Type { 184 case linux.IFLA_IFNAME: 185 if len(value) < 1 { 186 return syserr.ErrInvalidArgument 187 } 188 if ifinfomsg.Index != 0 { 189 // Device name changing isn't supported yet. 190 return syserr.ErrNotSupported 191 } 192 ifname := string(value[:len(value)-1]) 193 for idx, ifa := range stack.Interfaces() { 194 if ifname == ifa.Name { 195 ifinfomsg.Index = idx 196 break 197 } 198 } 199 default: 200 ctx.Warningf("unexpected attribute: %x", ahdr.Type) 201 return syserr.ErrNotSupported 202 } 203 } 204 if ifinfomsg.Index == 0 { 205 return syserr.ErrNoDevice 206 } 207 208 flags := msg.Header().Flags 209 if flags&linux.NLM_F_EXCL != 0 { 210 return syserr.ErrExists 211 } 212 if flags&linux.NLM_F_REPLACE != 0 { 213 return syserr.ErrExists 214 } 215 216 if ifinfomsg.Flags != 0 || ifinfomsg.Change != 0 { 217 if ifinfomsg.Change & ^uint32(linux.IFF_UP) != 0 { 218 ctx.Warningf("Unsupported ifi_change flags: %x", ifinfomsg.Change) 219 return syserr.ErrInvalidArgument 220 } 221 if ifinfomsg.Flags & ^uint32(linux.IFF_UP) != 0 { 222 ctx.Warningf("Unsupported ifi_flags: %x", ifinfomsg.Change) 223 return syserr.ErrInvalidArgument 224 } 225 // Netstack interfaces are always up. 226 } 227 return nil 228 } 229 230 // delLink handles RTM_DELLINK requests. 231 func (p *Protocol) delLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 232 stack := inet.StackFromContext(ctx) 233 if stack == nil { 234 // No network stack. 235 return syserr.ErrProtocolNotSupported 236 } 237 238 var ifinfomsg linux.InterfaceInfoMessage 239 attrs, ok := msg.GetData(&ifinfomsg) 240 if !ok { 241 return syserr.ErrInvalidArgument 242 } 243 if ifinfomsg.Index == 0 { 244 // The index is unspecified, search by the interface name. 245 ahdr, value, _, ok := attrs.ParseFirst() 246 if !ok { 247 return syserr.ErrInvalidArgument 248 } 249 switch ahdr.Type { 250 case linux.IFLA_IFNAME: 251 if len(value) < 1 { 252 return syserr.ErrInvalidArgument 253 } 254 ifname := string(value[:len(value)-1]) 255 for idx, ifa := range stack.Interfaces() { 256 if ifname == ifa.Name { 257 ifinfomsg.Index = idx 258 break 259 } 260 } 261 default: 262 return syserr.ErrInvalidArgument 263 } 264 if ifinfomsg.Index == 0 { 265 return syserr.ErrNoDevice 266 } 267 } 268 return syserr.FromError(stack.RemoveInterface(ifinfomsg.Index)) 269 } 270 271 // addNewLinkMessage appends RTM_NEWLINK message for the given interface into 272 // the message set. 273 func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) { 274 m := ms.AddMessage(linux.NetlinkMessageHeader{ 275 Type: linux.RTM_NEWLINK, 276 }) 277 278 m.Put(&linux.InterfaceInfoMessage{ 279 Family: linux.AF_UNSPEC, 280 Type: i.DeviceType, 281 Index: idx, 282 Flags: i.Flags, 283 }) 284 285 m.PutAttrString(linux.IFLA_IFNAME, i.Name) 286 m.PutAttr(linux.IFLA_MTU, primitive.AllocateUint32(i.MTU)) 287 288 mac := make([]byte, 6) 289 brd := mac 290 if len(i.Addr) > 0 { 291 mac = i.Addr 292 brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) 293 } 294 m.PutAttr(linux.IFLA_ADDRESS, primitive.AsByteSlice(mac)) 295 m.PutAttr(linux.IFLA_BROADCAST, primitive.AsByteSlice(brd)) 296 297 // TODO(gvisor.dev/issue/578): There are many more attributes. 298 } 299 300 // dumpAddrs handles RTM_GETADDR dump requests. 301 func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 302 // RTM_GETADDR dump requests need not contain anything more than the 303 // netlink header and 1 byte protocol family common to all 304 // NETLINK_ROUTE requests. 305 // 306 // TODO(b/68878065): Filter output by passed protocol family. 307 308 // The RTM_GETADDR dump response is a set of RTM_NEWADDR messages each 309 // containing an InterfaceAddrMessage followed by a set of netlink 310 // attributes. 311 312 // We always send back an NLMSG_DONE. 313 ms.Multi = true 314 315 stack := inet.StackFromContext(ctx) 316 if stack == nil { 317 // No network devices. 318 return nil 319 } 320 321 for id, as := range stack.InterfaceAddrs() { 322 for _, a := range as { 323 m := ms.AddMessage(linux.NetlinkMessageHeader{ 324 Type: linux.RTM_NEWADDR, 325 }) 326 327 m.Put(&linux.InterfaceAddrMessage{ 328 Family: a.Family, 329 PrefixLen: a.PrefixLen, 330 Index: uint32(id), 331 }) 332 333 addr := primitive.ByteSlice([]byte(a.Addr)) 334 m.PutAttr(linux.IFA_LOCAL, &addr) 335 m.PutAttr(linux.IFA_ADDRESS, &addr) 336 337 // TODO(gvisor.dev/issue/578): There are many more attributes. 338 } 339 } 340 341 return nil 342 } 343 344 // commonPrefixLen reports the length of the longest IP address prefix. 345 // This is a simplied version from Golang's src/net/addrselect.go. 346 func commonPrefixLen(a, b []byte) (cpl int) { 347 for len(a) > 0 { 348 if a[0] == b[0] { 349 cpl += 8 350 a = a[1:] 351 b = b[1:] 352 continue 353 } 354 bits := 8 355 ab, bb := a[0], b[0] 356 for { 357 ab >>= 1 358 bb >>= 1 359 bits-- 360 if ab == bb { 361 cpl += bits 362 return 363 } 364 } 365 } 366 return 367 } 368 369 // fillRoute returns the Route using LPM algorithm. Refer to Linux's 370 // net/ipv4/route.c:rt_fill_info(). 371 func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { 372 family := uint8(linux.AF_INET) 373 if len(addr) != 4 { 374 family = linux.AF_INET6 375 } 376 377 idx := -1 // Index of the Route rule to be returned. 378 idxDef := -1 // Index of the default route rule. 379 prefix := 0 // Current longest prefix. 380 for i, route := range routes { 381 if route.Family != family { 382 continue 383 } 384 385 if len(route.GatewayAddr) > 0 && route.DstLen == 0 { 386 idxDef = i 387 continue 388 } 389 390 cpl := commonPrefixLen(addr, route.DstAddr) 391 if cpl < int(route.DstLen) { 392 continue 393 } 394 cpl = int(route.DstLen) 395 if cpl > prefix { 396 idx = i 397 prefix = cpl 398 } 399 } 400 if idx == -1 { 401 idx = idxDef 402 } 403 if idx == -1 { 404 return inet.Route{}, syserr.ErrHostUnreachable 405 } 406 407 route := routes[idx] 408 if family == linux.AF_INET { 409 route.DstLen = 32 410 } else { 411 route.DstLen = 128 412 } 413 route.DstAddr = addr 414 route.Flags |= linux.RTM_F_CLONED // This route is cloned. 415 return route, nil 416 } 417 418 // parseForDestination parses a message as format of RouteMessage-RtAttr-dst. 419 func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) { 420 var rtMsg linux.RouteMessage 421 attrs, ok := msg.GetData(&rtMsg) 422 if !ok { 423 return nil, syserr.ErrInvalidArgument 424 } 425 // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See 426 // commit bc234301af12. Note we don't check this flag for backward 427 // compatibility. 428 if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE { 429 return nil, syserr.ErrNotSupported 430 } 431 432 // Expect first attribute is RTA_DST. 433 if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST { 434 return value, nil 435 } 436 return nil, syserr.ErrInvalidArgument 437 } 438 439 // dumpRoutes handles RTM_GETROUTE requests. 440 func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 441 // RTM_GETROUTE dump requests need not contain anything more than the 442 // netlink header and 1 byte protocol family common to all 443 // NETLINK_ROUTE requests. 444 445 stack := inet.StackFromContext(ctx) 446 if stack == nil { 447 // No network routes. 448 return nil 449 } 450 451 hdr := msg.Header() 452 routeTables := stack.RouteTable() 453 454 if hdr.Flags == linux.NLM_F_REQUEST { 455 dst, err := parseForDestination(msg) 456 if err != nil { 457 return err 458 } 459 route, err := fillRoute(routeTables, dst) 460 if err != nil { 461 // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH. 462 return syserr.ErrNotSupported 463 } 464 routeTables = append([]inet.Route{}, route) 465 } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 466 // We always send back an NLMSG_DONE. 467 ms.Multi = true 468 } else { 469 // TODO(b/68878065): Only above cases are supported. 470 return syserr.ErrNotSupported 471 } 472 473 for _, rt := range routeTables { 474 m := ms.AddMessage(linux.NetlinkMessageHeader{ 475 Type: linux.RTM_NEWROUTE, 476 }) 477 478 m.Put(&linux.RouteMessage{ 479 Family: rt.Family, 480 DstLen: rt.DstLen, 481 SrcLen: rt.SrcLen, 482 TOS: rt.TOS, 483 484 // Always return the main table since we don't have multiple 485 // routing tables. 486 Table: linux.RT_TABLE_MAIN, 487 Protocol: rt.Protocol, 488 Scope: rt.Scope, 489 Type: rt.Type, 490 491 Flags: rt.Flags, 492 }) 493 494 m.PutAttr(254, primitive.AsByteSlice([]byte{123})) 495 if rt.DstLen > 0 { 496 m.PutAttr(linux.RTA_DST, primitive.AsByteSlice(rt.DstAddr)) 497 } 498 if rt.SrcLen > 0 { 499 m.PutAttr(linux.RTA_SRC, primitive.AsByteSlice(rt.SrcAddr)) 500 } 501 if rt.OutputInterface != 0 { 502 m.PutAttr(linux.RTA_OIF, primitive.AllocateInt32(rt.OutputInterface)) 503 } 504 if len(rt.GatewayAddr) > 0 { 505 m.PutAttr(linux.RTA_GATEWAY, primitive.AsByteSlice(rt.GatewayAddr)) 506 } 507 508 // TODO(gvisor.dev/issue/578): There are many more attributes. 509 } 510 511 return nil 512 } 513 514 // newAddr handles RTM_NEWADDR requests. 515 func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 516 stack := inet.StackFromContext(ctx) 517 if stack == nil { 518 // No network stack. 519 return syserr.ErrProtocolNotSupported 520 } 521 522 var ifa linux.InterfaceAddrMessage 523 attrs, ok := msg.GetData(&ifa) 524 if !ok { 525 return syserr.ErrInvalidArgument 526 } 527 528 for !attrs.Empty() { 529 ahdr, value, rest, ok := attrs.ParseFirst() 530 if !ok { 531 return syserr.ErrInvalidArgument 532 } 533 attrs = rest 534 535 // NOTE: A netlink message will contain multiple header attributes. 536 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 537 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 538 // local interface address. We add the local interface address here 539 // and ignore the IFA_ADDRESS. 540 switch ahdr.Type { 541 case linux.IFA_LOCAL: 542 err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 543 Family: ifa.Family, 544 PrefixLen: ifa.PrefixLen, 545 Flags: ifa.Flags, 546 Addr: value, 547 }) 548 if linuxerr.Equals(linuxerr.EEXIST, err) { 549 flags := msg.Header().Flags 550 if flags&linux.NLM_F_EXCL != 0 { 551 return syserr.ErrExists 552 } 553 } else if err != nil { 554 return syserr.ErrInvalidArgument 555 } 556 case linux.IFA_ADDRESS: 557 default: 558 return syserr.ErrNotSupported 559 } 560 } 561 return nil 562 } 563 564 // delAddr handles RTM_DELADDR requests. 565 func (p *Protocol) delAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 566 stack := inet.StackFromContext(ctx) 567 if stack == nil { 568 // No network stack. 569 return syserr.ErrProtocolNotSupported 570 } 571 572 var ifa linux.InterfaceAddrMessage 573 attrs, ok := msg.GetData(&ifa) 574 if !ok { 575 return syserr.ErrInvalidArgument 576 } 577 578 for !attrs.Empty() { 579 ahdr, value, rest, ok := attrs.ParseFirst() 580 if !ok { 581 return syserr.ErrInvalidArgument 582 } 583 attrs = rest 584 585 // NOTE: A netlink message will contain multiple header attributes. 586 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 587 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 588 // local interface address. We use the local interface address to 589 // remove the address and ignore the IFA_ADDRESS. 590 switch ahdr.Type { 591 case linux.IFA_LOCAL: 592 err := stack.RemoveInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 593 Family: ifa.Family, 594 PrefixLen: ifa.PrefixLen, 595 Flags: ifa.Flags, 596 Addr: value, 597 }) 598 if err != nil { 599 return syserr.ErrBadLocalAddress 600 } 601 case linux.IFA_ADDRESS: 602 default: 603 return syserr.ErrNotSupported 604 } 605 } 606 607 return nil 608 } 609 610 // ProcessMessage implements netlink.Protocol.ProcessMessage. 611 func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 612 hdr := msg.Header() 613 614 // All messages start with a 1 byte protocol family. 615 var family primitive.Uint8 616 if _, ok := msg.GetData(&family); !ok { 617 // Linux ignores messages missing the protocol family. See 618 // net/core/rtnetlink.c:rtnetlink_rcv_msg. 619 return nil 620 } 621 622 // Non-GET message types require CAP_NET_ADMIN. 623 if typeKind(hdr.Type) != kindGet { 624 creds := auth.CredentialsFromContext(ctx) 625 if !creds.HasCapability(linux.CAP_NET_ADMIN) { 626 return syserr.ErrPermissionDenied 627 } 628 } 629 630 if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 631 // TODO(b/68878065): Only the dump variant of the types below are 632 // supported. 633 switch hdr.Type { 634 case linux.RTM_GETLINK: 635 return p.dumpLinks(ctx, msg, ms) 636 case linux.RTM_GETADDR: 637 return p.dumpAddrs(ctx, msg, ms) 638 case linux.RTM_GETROUTE: 639 return p.dumpRoutes(ctx, msg, ms) 640 default: 641 return syserr.ErrNotSupported 642 } 643 } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST { 644 switch hdr.Type { 645 case linux.RTM_NEWLINK: 646 return p.newLink(ctx, msg, ms) 647 case linux.RTM_GETLINK: 648 return p.getLink(ctx, msg, ms) 649 case linux.RTM_DELLINK: 650 return p.delLink(ctx, msg, ms) 651 case linux.RTM_GETROUTE: 652 return p.dumpRoutes(ctx, msg, ms) 653 case linux.RTM_NEWADDR: 654 return p.newAddr(ctx, msg, ms) 655 case linux.RTM_DELADDR: 656 return p.delAddr(ctx, msg, ms) 657 default: 658 return syserr.ErrNotSupported 659 } 660 } 661 return syserr.ErrNotSupported 662 } 663 664 // init registers the NETLINK_ROUTE provider. 665 func init() { 666 netlink.RegisterProvider(linux.NETLINK_ROUTE, NewProtocol) 667 }