gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/netlink/route/protocol.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package route provides a NETLINK_ROUTE socket protocol. 16 package route 17 18 import ( 19 "bytes" 20 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/context" 23 "gvisor.dev/gvisor/pkg/errors/linuxerr" 24 "gvisor.dev/gvisor/pkg/marshal/primitive" 25 "gvisor.dev/gvisor/pkg/sentry/inet" 26 "gvisor.dev/gvisor/pkg/sentry/kernel" 27 "gvisor.dev/gvisor/pkg/sentry/kernel/auth" 28 "gvisor.dev/gvisor/pkg/sentry/socket/netlink" 29 "gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg" 30 "gvisor.dev/gvisor/pkg/syserr" 31 ) 32 33 // commandKind describes the operational class of a message type. 34 // 35 // The route message types use the lower 2 bits of the type to describe class 36 // of command. 37 type commandKind int 38 39 const ( 40 kindNew commandKind = 0x0 41 kindDel commandKind = 0x1 42 kindGet commandKind = 0x2 43 kindSet commandKind = 0x3 44 ) 45 46 func typeKind(typ uint16) commandKind { 47 return commandKind(typ & 0x3) 48 } 49 50 // Protocol implements netlink.Protocol. 51 // 52 // +stateify savable 53 type Protocol struct{} 54 55 var _ netlink.Protocol = (*Protocol)(nil) 56 57 // NewProtocol creates a NETLINK_ROUTE netlink.Protocol. 58 func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) { 59 return &Protocol{}, nil 60 } 61 62 // Protocol implements netlink.Protocol.Protocol. 63 func (p *Protocol) Protocol() int { 64 return linux.NETLINK_ROUTE 65 } 66 67 // CanSend implements netlink.Protocol.CanSend. 68 func (p *Protocol) CanSend() bool { 69 return true 70 } 71 72 // dumpLinks handles RTM_GETLINK dump requests. 73 func (p *Protocol) dumpLinks(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 74 // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an 75 // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some 76 // userspace applications (including glibc) still include rtgenmsg. 77 // Linux has a workaround based on the total message length. 78 // 79 // We don't bother to check for either, since we don't support any 80 // extra attributes that may be included anyways. 81 // 82 // The message may also contain netlink attribute IFLA_EXT_MASK, which 83 // we don't support. 84 85 // The RTM_GETLINK dump response is a set of messages each containing 86 // an InterfaceInfoMessage followed by a set of netlink attributes. 87 88 // We always send back an NLMSG_DONE. 89 ms.Multi = true 90 91 stack := inet.StackFromContext(ctx) 92 if stack == nil { 93 // No network devices. 94 return nil 95 } 96 97 for idx, i := range stack.Interfaces() { 98 addNewLinkMessage(ms, idx, i) 99 } 100 101 return nil 102 } 103 104 // getLinks handles RTM_GETLINK requests. 105 func (p *Protocol) getLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 106 stack := inet.StackFromContext(ctx) 107 if stack == nil { 108 // No network devices. 109 return nil 110 } 111 112 // Parse message. 113 var ifi linux.InterfaceInfoMessage 114 attrs, ok := msg.GetData(&ifi) 115 if !ok { 116 return syserr.ErrInvalidArgument 117 } 118 119 // Parse attributes. 120 var byName []byte 121 for !attrs.Empty() { 122 ahdr, value, rest, ok := attrs.ParseFirst() 123 if !ok { 124 return syserr.ErrInvalidArgument 125 } 126 attrs = rest 127 128 switch ahdr.Type { 129 case linux.IFLA_IFNAME: 130 if len(value) < 1 { 131 return syserr.ErrInvalidArgument 132 } 133 byName = value[:len(value)-1] 134 135 // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK. 136 } 137 } 138 139 found := false 140 for idx, i := range stack.Interfaces() { 141 switch { 142 case ifi.Index > 0: 143 if idx != ifi.Index { 144 continue 145 } 146 case byName != nil: 147 if string(byName) != i.Name { 148 continue 149 } 150 default: 151 // Criteria not specified. 152 return syserr.ErrInvalidArgument 153 } 154 155 addNewLinkMessage(ms, idx, i) 156 found = true 157 break 158 } 159 if !found { 160 return syserr.ErrNoDevice 161 } 162 return nil 163 } 164 165 func (p *Protocol) newLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 166 stack := inet.StackFromContext(ctx) 167 if stack == nil { 168 // No network stack. 169 return syserr.ErrProtocolNotSupported 170 } 171 172 return stack.SetInterface(ctx, msg) 173 } 174 175 // delLink handles RTM_DELLINK requests. 176 func (p *Protocol) delLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 177 stack := inet.StackFromContext(ctx) 178 if stack == nil { 179 // No network stack. 180 return syserr.ErrProtocolNotSupported 181 } 182 183 var ifinfomsg linux.InterfaceInfoMessage 184 attrs, ok := msg.GetData(&ifinfomsg) 185 if !ok { 186 return syserr.ErrInvalidArgument 187 } 188 if ifinfomsg.Index == 0 { 189 // The index is unspecified, search by the interface name. 190 ahdr, value, _, ok := attrs.ParseFirst() 191 if !ok { 192 return syserr.ErrInvalidArgument 193 } 194 switch ahdr.Type { 195 case linux.IFLA_IFNAME: 196 if len(value) < 1 { 197 return syserr.ErrInvalidArgument 198 } 199 ifname := string(value[:len(value)-1]) 200 for idx, ifa := range stack.Interfaces() { 201 if ifname == ifa.Name { 202 ifinfomsg.Index = idx 203 break 204 } 205 } 206 default: 207 return syserr.ErrInvalidArgument 208 } 209 if ifinfomsg.Index == 0 { 210 return syserr.ErrNoDevice 211 } 212 } 213 return syserr.FromError(stack.RemoveInterface(ifinfomsg.Index)) 214 } 215 216 // addNewLinkMessage appends RTM_NEWLINK message for the given interface into 217 // the message set. 218 func addNewLinkMessage(ms *nlmsg.MessageSet, idx int32, i inet.Interface) { 219 m := ms.AddMessage(linux.NetlinkMessageHeader{ 220 Type: linux.RTM_NEWLINK, 221 }) 222 223 m.Put(&linux.InterfaceInfoMessage{ 224 Family: linux.AF_UNSPEC, 225 Type: i.DeviceType, 226 Index: idx, 227 Flags: i.Flags, 228 }) 229 230 m.PutAttrString(linux.IFLA_IFNAME, i.Name) 231 m.PutAttr(linux.IFLA_MTU, primitive.AllocateUint32(i.MTU)) 232 233 mac := make([]byte, 6) 234 brd := mac 235 if len(i.Addr) > 0 { 236 mac = i.Addr 237 brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) 238 } 239 m.PutAttr(linux.IFLA_ADDRESS, primitive.AsByteSlice(mac)) 240 m.PutAttr(linux.IFLA_BROADCAST, primitive.AsByteSlice(brd)) 241 242 // TODO(gvisor.dev/issue/578): There are many more attributes. 243 } 244 245 // dumpAddrs handles RTM_GETADDR dump requests. 246 func (p *Protocol) dumpAddrs(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 247 // RTM_GETADDR dump requests need not contain anything more than the 248 // netlink header and 1 byte protocol family common to all 249 // NETLINK_ROUTE requests. 250 // 251 // TODO(b/68878065): Filter output by passed protocol family. 252 253 // The RTM_GETADDR dump response is a set of RTM_NEWADDR messages each 254 // containing an InterfaceAddrMessage followed by a set of netlink 255 // attributes. 256 257 // We always send back an NLMSG_DONE. 258 ms.Multi = true 259 260 stack := inet.StackFromContext(ctx) 261 if stack == nil { 262 // No network devices. 263 return nil 264 } 265 266 for id, as := range stack.InterfaceAddrs() { 267 for _, a := range as { 268 m := ms.AddMessage(linux.NetlinkMessageHeader{ 269 Type: linux.RTM_NEWADDR, 270 }) 271 272 m.Put(&linux.InterfaceAddrMessage{ 273 Family: a.Family, 274 PrefixLen: a.PrefixLen, 275 Index: uint32(id), 276 }) 277 278 addr := primitive.ByteSlice([]byte(a.Addr)) 279 m.PutAttr(linux.IFA_LOCAL, &addr) 280 m.PutAttr(linux.IFA_ADDRESS, &addr) 281 282 // TODO(gvisor.dev/issue/578): There are many more attributes. 283 } 284 } 285 286 return nil 287 } 288 289 // commonPrefixLen reports the length of the longest IP address prefix. 290 // This is a simplified version from Golang's src/net/addrselect.go. 291 func commonPrefixLen(a, b []byte) (cpl int) { 292 for len(a) > 0 { 293 if a[0] == b[0] { 294 cpl += 8 295 a = a[1:] 296 b = b[1:] 297 continue 298 } 299 bits := 8 300 ab, bb := a[0], b[0] 301 for { 302 ab >>= 1 303 bb >>= 1 304 bits-- 305 if ab == bb { 306 cpl += bits 307 return 308 } 309 } 310 } 311 return 312 } 313 314 // fillRoute returns the Route using LPM algorithm. Refer to Linux's 315 // net/ipv4/route.c:rt_fill_info(). 316 func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { 317 family := uint8(linux.AF_INET) 318 if len(addr) != 4 { 319 family = linux.AF_INET6 320 } 321 322 idx := -1 // Index of the Route rule to be returned. 323 idxDef := -1 // Index of the default route rule. 324 prefix := 0 // Current longest prefix. 325 for i, route := range routes { 326 if route.Family != family { 327 continue 328 } 329 330 if len(route.GatewayAddr) > 0 && route.DstLen == 0 { 331 idxDef = i 332 continue 333 } 334 335 cpl := commonPrefixLen(addr, route.DstAddr) 336 if cpl < int(route.DstLen) { 337 continue 338 } 339 cpl = int(route.DstLen) 340 if cpl > prefix { 341 idx = i 342 prefix = cpl 343 } 344 } 345 if idx == -1 { 346 idx = idxDef 347 } 348 if idx == -1 { 349 return inet.Route{}, syserr.ErrHostUnreachable 350 } 351 352 route := routes[idx] 353 if family == linux.AF_INET { 354 route.DstLen = 32 355 } else { 356 route.DstLen = 128 357 } 358 route.DstAddr = addr 359 route.Flags |= linux.RTM_F_CLONED // This route is cloned. 360 return route, nil 361 } 362 363 // parseForDestination parses a message as format of RouteMessage-RtAttr-dst. 364 func parseForDestination(msg *nlmsg.Message) ([]byte, *syserr.Error) { 365 var rtMsg linux.RouteMessage 366 attrs, ok := msg.GetData(&rtMsg) 367 if !ok { 368 return nil, syserr.ErrInvalidArgument 369 } 370 // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See 371 // commit bc234301af12. Note we don't check this flag for backward 372 // compatibility. 373 if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE { 374 return nil, syserr.ErrNotSupported 375 } 376 377 // Expect first attribute is RTA_DST. 378 if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST { 379 return value, nil 380 } 381 return nil, syserr.ErrInvalidArgument 382 } 383 384 // dumpRoutes handles RTM_GETROUTE requests. 385 func (p *Protocol) dumpRoutes(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 386 // RTM_GETROUTE dump requests need not contain anything more than the 387 // netlink header and 1 byte protocol family common to all 388 // NETLINK_ROUTE requests. 389 390 stack := inet.StackFromContext(ctx) 391 if stack == nil { 392 // No network routes. 393 return nil 394 } 395 396 hdr := msg.Header() 397 routeTables := stack.RouteTable() 398 399 if hdr.Flags == linux.NLM_F_REQUEST { 400 dst, err := parseForDestination(msg) 401 if err != nil { 402 return err 403 } 404 route, err := fillRoute(routeTables, dst) 405 if err != nil { 406 // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH. 407 return syserr.ErrNotSupported 408 } 409 routeTables = append([]inet.Route{}, route) 410 } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 411 // We always send back an NLMSG_DONE. 412 ms.Multi = true 413 } else { 414 // TODO(b/68878065): Only above cases are supported. 415 return syserr.ErrNotSupported 416 } 417 418 for _, rt := range routeTables { 419 m := ms.AddMessage(linux.NetlinkMessageHeader{ 420 Type: linux.RTM_NEWROUTE, 421 }) 422 423 m.Put(&linux.RouteMessage{ 424 Family: rt.Family, 425 DstLen: rt.DstLen, 426 SrcLen: rt.SrcLen, 427 TOS: rt.TOS, 428 429 // Always return the main table since we don't have multiple 430 // routing tables. 431 Table: linux.RT_TABLE_MAIN, 432 Protocol: rt.Protocol, 433 Scope: rt.Scope, 434 Type: rt.Type, 435 436 Flags: rt.Flags, 437 }) 438 439 m.PutAttr(254, primitive.AsByteSlice([]byte{123})) 440 if rt.DstLen > 0 { 441 m.PutAttr(linux.RTA_DST, primitive.AsByteSlice(rt.DstAddr)) 442 } 443 if rt.SrcLen > 0 { 444 m.PutAttr(linux.RTA_SRC, primitive.AsByteSlice(rt.SrcAddr)) 445 } 446 if rt.OutputInterface != 0 { 447 m.PutAttr(linux.RTA_OIF, primitive.AllocateInt32(rt.OutputInterface)) 448 } 449 if len(rt.GatewayAddr) > 0 { 450 m.PutAttr(linux.RTA_GATEWAY, primitive.AsByteSlice(rt.GatewayAddr)) 451 } 452 453 // TODO(gvisor.dev/issue/578): There are many more attributes. 454 } 455 456 return nil 457 } 458 459 // newAddr handles RTM_NEWADDR requests. 460 func (p *Protocol) newAddr(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 461 stack := inet.StackFromContext(ctx) 462 if stack == nil { 463 // No network stack. 464 return syserr.ErrProtocolNotSupported 465 } 466 467 var ifa linux.InterfaceAddrMessage 468 attrs, ok := msg.GetData(&ifa) 469 if !ok { 470 return syserr.ErrInvalidArgument 471 } 472 473 for !attrs.Empty() { 474 ahdr, value, rest, ok := attrs.ParseFirst() 475 if !ok { 476 return syserr.ErrInvalidArgument 477 } 478 attrs = rest 479 480 // NOTE: A netlink message will contain multiple header attributes. 481 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 482 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 483 // local interface address. We add the local interface address here 484 // and ignore the IFA_ADDRESS. 485 switch ahdr.Type { 486 case linux.IFA_LOCAL: 487 err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 488 Family: ifa.Family, 489 PrefixLen: ifa.PrefixLen, 490 Flags: ifa.Flags, 491 Addr: value, 492 }) 493 if linuxerr.Equals(linuxerr.EEXIST, err) { 494 flags := msg.Header().Flags 495 if flags&linux.NLM_F_EXCL != 0 { 496 return syserr.ErrExists 497 } 498 } else if err != nil { 499 return syserr.ErrInvalidArgument 500 } 501 case linux.IFA_ADDRESS: 502 default: 503 return syserr.ErrNotSupported 504 } 505 } 506 return nil 507 } 508 509 // delAddr handles RTM_DELADDR requests. 510 func (p *Protocol) delAddr(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 511 stack := inet.StackFromContext(ctx) 512 if stack == nil { 513 // No network stack. 514 return syserr.ErrProtocolNotSupported 515 } 516 517 var ifa linux.InterfaceAddrMessage 518 attrs, ok := msg.GetData(&ifa) 519 if !ok { 520 return syserr.ErrInvalidArgument 521 } 522 523 for !attrs.Empty() { 524 ahdr, value, rest, ok := attrs.ParseFirst() 525 if !ok { 526 return syserr.ErrInvalidArgument 527 } 528 attrs = rest 529 530 // NOTE: A netlink message will contain multiple header attributes. 531 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 532 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 533 // local interface address. We use the local interface address to 534 // remove the address and ignore the IFA_ADDRESS. 535 switch ahdr.Type { 536 case linux.IFA_LOCAL: 537 err := stack.RemoveInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 538 Family: ifa.Family, 539 PrefixLen: ifa.PrefixLen, 540 Flags: ifa.Flags, 541 Addr: value, 542 }) 543 if err != nil { 544 return syserr.ErrBadLocalAddress 545 } 546 case linux.IFA_ADDRESS: 547 default: 548 return syserr.ErrNotSupported 549 } 550 } 551 552 return nil 553 } 554 555 // ProcessMessage implements netlink.Protocol.ProcessMessage. 556 func (p *Protocol) ProcessMessage(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error { 557 hdr := msg.Header() 558 559 // All messages start with a 1 byte protocol family. 560 var family primitive.Uint8 561 if _, ok := msg.GetData(&family); !ok { 562 // Linux ignores messages missing the protocol family. See 563 // net/core/rtnetlink.c:rtnetlink_rcv_msg. 564 return nil 565 } 566 567 // Non-GET message types require CAP_NET_ADMIN. 568 if typeKind(hdr.Type) != kindGet { 569 creds := auth.CredentialsFromContext(ctx) 570 if !creds.HasCapability(linux.CAP_NET_ADMIN) { 571 return syserr.ErrPermissionDenied 572 } 573 } 574 575 if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 576 // TODO(b/68878065): Only the dump variant of the types below are 577 // supported. 578 switch hdr.Type { 579 case linux.RTM_GETLINK: 580 return p.dumpLinks(ctx, msg, ms) 581 case linux.RTM_GETADDR: 582 return p.dumpAddrs(ctx, msg, ms) 583 case linux.RTM_GETROUTE: 584 return p.dumpRoutes(ctx, msg, ms) 585 default: 586 return syserr.ErrNotSupported 587 } 588 } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST { 589 switch hdr.Type { 590 case linux.RTM_NEWLINK: 591 return p.newLink(ctx, msg, ms) 592 case linux.RTM_GETLINK: 593 return p.getLink(ctx, msg, ms) 594 case linux.RTM_DELLINK: 595 return p.delLink(ctx, msg, ms) 596 case linux.RTM_GETROUTE: 597 return p.dumpRoutes(ctx, msg, ms) 598 case linux.RTM_NEWADDR: 599 return p.newAddr(ctx, msg, ms) 600 case linux.RTM_DELADDR: 601 return p.delAddr(ctx, msg, ms) 602 default: 603 return syserr.ErrNotSupported 604 } 605 } 606 return syserr.ErrNotSupported 607 } 608 609 // init registers the NETLINK_ROUTE provider. 610 func init() { 611 netlink.RegisterProvider(linux.NETLINK_ROUTE, NewProtocol) 612 }