github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/socket/netlink/route/protocol.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package route provides a NETLINK_ROUTE socket protocol. 16 package route 17 18 import ( 19 "bytes" 20 21 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 22 "github.com/MerlinKodo/gvisor/pkg/context" 23 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 24 "github.com/MerlinKodo/gvisor/pkg/marshal/primitive" 25 "github.com/MerlinKodo/gvisor/pkg/sentry/inet" 26 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 28 "github.com/MerlinKodo/gvisor/pkg/sentry/socket/netlink" 29 "github.com/MerlinKodo/gvisor/pkg/syserr" 30 ) 31 32 // commandKind describes the operational class of a message type. 33 // 34 // The route message types use the lower 2 bits of the type to describe class 35 // of command. 36 type commandKind int 37 38 const ( 39 kindNew commandKind = 0x0 40 kindDel commandKind = 0x1 41 kindGet commandKind = 0x2 42 kindSet commandKind = 0x3 43 ) 44 45 func typeKind(typ uint16) commandKind { 46 return commandKind(typ & 0x3) 47 } 48 49 // Protocol implements netlink.Protocol. 50 // 51 // +stateify savable 52 type Protocol struct{} 53 54 var _ netlink.Protocol = (*Protocol)(nil) 55 56 // NewProtocol creates a NETLINK_ROUTE netlink.Protocol. 57 func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) { 58 return &Protocol{}, nil 59 } 60 61 // Protocol implements netlink.Protocol.Protocol. 62 func (p *Protocol) Protocol() int { 63 return linux.NETLINK_ROUTE 64 } 65 66 // CanSend implements netlink.Protocol.CanSend. 67 func (p *Protocol) CanSend() bool { 68 return true 69 } 70 71 // dumpLinks handles RTM_GETLINK dump requests. 72 func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 73 // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an 74 // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some 75 // userspace applications (including glibc) still include rtgenmsg. 76 // Linux has a workaround based on the total message length. 77 // 78 // We don't bother to check for either, since we don't support any 79 // extra attributes that may be included anyways. 80 // 81 // The message may also contain netlink attribute IFLA_EXT_MASK, which 82 // we don't support. 83 84 // The RTM_GETLINK dump response is a set of messages each containing 85 // an InterfaceInfoMessage followed by a set of netlink attributes. 86 87 // We always send back an NLMSG_DONE. 88 ms.Multi = true 89 90 stack := inet.StackFromContext(ctx) 91 if stack == nil { 92 // No network devices. 93 return nil 94 } 95 96 for idx, i := range stack.Interfaces() { 97 addNewLinkMessage(ms, idx, i) 98 } 99 100 return nil 101 } 102 103 // getLinks handles RTM_GETLINK requests. 104 func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 105 stack := inet.StackFromContext(ctx) 106 if stack == nil { 107 // No network devices. 108 return nil 109 } 110 111 // Parse message. 112 var ifi linux.InterfaceInfoMessage 113 attrs, ok := msg.GetData(&ifi) 114 if !ok { 115 return syserr.ErrInvalidArgument 116 } 117 118 // Parse attributes. 119 var byName []byte 120 for !attrs.Empty() { 121 ahdr, value, rest, ok := attrs.ParseFirst() 122 if !ok { 123 return syserr.ErrInvalidArgument 124 } 125 attrs = rest 126 127 switch ahdr.Type { 128 case linux.IFLA_IFNAME: 129 if len(value) < 1 { 130 return syserr.ErrInvalidArgument 131 } 132 byName = value[:len(value)-1] 133 134 // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK. 135 } 136 } 137 138 found := false 139 for idx, i := range stack.Interfaces() { 140 switch { 141 case ifi.Index > 0: 142 if idx != ifi.Index { 143 continue 144 } 145 case byName != nil: 146 if string(byName) != i.Name { 147 continue 148 } 149 default: 150 // Criteria not specified. 151 return syserr.ErrInvalidArgument 152 } 153 154 addNewLinkMessage(ms, idx, i) 155 found = true 156 break 157 } 158 if !found { 159 return syserr.ErrNoDevice 160 } 161 return nil 162 } 163 164 // delLink handles RTM_DELLINK requests. 165 func (p *Protocol) delLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 166 stack := inet.StackFromContext(ctx) 167 if stack == nil { 168 // No network stack. 169 return syserr.ErrProtocolNotSupported 170 } 171 172 var ifinfomsg linux.InterfaceInfoMessage 173 attrs, ok := msg.GetData(&ifinfomsg) 174 if !ok { 175 return syserr.ErrInvalidArgument 176 } 177 if ifinfomsg.Index == 0 { 178 // The index is unspecified, search by the interface name. 179 ahdr, value, _, ok := attrs.ParseFirst() 180 if !ok { 181 return syserr.ErrInvalidArgument 182 } 183 switch ahdr.Type { 184 case linux.IFLA_IFNAME: 185 if len(value) < 1 { 186 return syserr.ErrInvalidArgument 187 } 188 ifname := string(value[:len(value)-1]) 189 for idx, ifa := range stack.Interfaces() { 190 if ifname == ifa.Name { 191 ifinfomsg.Index = idx 192 break 193 } 194 } 195 default: 196 return syserr.ErrInvalidArgument 197 } 198 if ifinfomsg.Index == 0 { 199 return syserr.ErrNoDevice 200 } 201 } 202 return syserr.FromError(stack.RemoveInterface(ifinfomsg.Index)) 203 } 204 205 // addNewLinkMessage appends RTM_NEWLINK message for the given interface into 206 // the message set. 207 func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) { 208 m := ms.AddMessage(linux.NetlinkMessageHeader{ 209 Type: linux.RTM_NEWLINK, 210 }) 211 212 m.Put(&linux.InterfaceInfoMessage{ 213 Family: linux.AF_UNSPEC, 214 Type: i.DeviceType, 215 Index: idx, 216 Flags: i.Flags, 217 }) 218 219 m.PutAttrString(linux.IFLA_IFNAME, i.Name) 220 m.PutAttr(linux.IFLA_MTU, primitive.AllocateUint32(i.MTU)) 221 222 mac := make([]byte, 6) 223 brd := mac 224 if len(i.Addr) > 0 { 225 mac = i.Addr 226 brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) 227 } 228 m.PutAttr(linux.IFLA_ADDRESS, primitive.AsByteSlice(mac)) 229 m.PutAttr(linux.IFLA_BROADCAST, primitive.AsByteSlice(brd)) 230 231 // TODO(gvisor.dev/issue/578): There are many more attributes. 232 } 233 234 // dumpAddrs handles RTM_GETADDR dump requests. 235 func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 236 // RTM_GETADDR dump requests need not contain anything more than the 237 // netlink header and 1 byte protocol family common to all 238 // NETLINK_ROUTE requests. 239 // 240 // TODO(b/68878065): Filter output by passed protocol family. 241 242 // The RTM_GETADDR dump response is a set of RTM_NEWADDR messages each 243 // containing an InterfaceAddrMessage followed by a set of netlink 244 // attributes. 245 246 // We always send back an NLMSG_DONE. 247 ms.Multi = true 248 249 stack := inet.StackFromContext(ctx) 250 if stack == nil { 251 // No network devices. 252 return nil 253 } 254 255 for id, as := range stack.InterfaceAddrs() { 256 for _, a := range as { 257 m := ms.AddMessage(linux.NetlinkMessageHeader{ 258 Type: linux.RTM_NEWADDR, 259 }) 260 261 m.Put(&linux.InterfaceAddrMessage{ 262 Family: a.Family, 263 PrefixLen: a.PrefixLen, 264 Index: uint32(id), 265 }) 266 267 addr := primitive.ByteSlice([]byte(a.Addr)) 268 m.PutAttr(linux.IFA_LOCAL, &addr) 269 m.PutAttr(linux.IFA_ADDRESS, &addr) 270 271 // TODO(gvisor.dev/issue/578): There are many more attributes. 272 } 273 } 274 275 return nil 276 } 277 278 // commonPrefixLen reports the length of the longest IP address prefix. 279 // This is a simplied version from Golang's src/net/addrselect.go. 280 func commonPrefixLen(a, b []byte) (cpl int) { 281 for len(a) > 0 { 282 if a[0] == b[0] { 283 cpl += 8 284 a = a[1:] 285 b = b[1:] 286 continue 287 } 288 bits := 8 289 ab, bb := a[0], b[0] 290 for { 291 ab >>= 1 292 bb >>= 1 293 bits-- 294 if ab == bb { 295 cpl += bits 296 return 297 } 298 } 299 } 300 return 301 } 302 303 // fillRoute returns the Route using LPM algorithm. Refer to Linux's 304 // net/ipv4/route.c:rt_fill_info(). 305 func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { 306 family := uint8(linux.AF_INET) 307 if len(addr) != 4 { 308 family = linux.AF_INET6 309 } 310 311 idx := -1 // Index of the Route rule to be returned. 312 idxDef := -1 // Index of the default route rule. 313 prefix := 0 // Current longest prefix. 314 for i, route := range routes { 315 if route.Family != family { 316 continue 317 } 318 319 if len(route.GatewayAddr) > 0 && route.DstLen == 0 { 320 idxDef = i 321 continue 322 } 323 324 cpl := commonPrefixLen(addr, route.DstAddr) 325 if cpl < int(route.DstLen) { 326 continue 327 } 328 cpl = int(route.DstLen) 329 if cpl > prefix { 330 idx = i 331 prefix = cpl 332 } 333 } 334 if idx == -1 { 335 idx = idxDef 336 } 337 if idx == -1 { 338 return inet.Route{}, syserr.ErrHostUnreachable 339 } 340 341 route := routes[idx] 342 if family == linux.AF_INET { 343 route.DstLen = 32 344 } else { 345 route.DstLen = 128 346 } 347 route.DstAddr = addr 348 route.Flags |= linux.RTM_F_CLONED // This route is cloned. 349 return route, nil 350 } 351 352 // parseForDestination parses a message as format of RouteMessage-RtAttr-dst. 353 func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) { 354 var rtMsg linux.RouteMessage 355 attrs, ok := msg.GetData(&rtMsg) 356 if !ok { 357 return nil, syserr.ErrInvalidArgument 358 } 359 // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See 360 // commit bc234301af12. Note we don't check this flag for backward 361 // compatibility. 362 if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE { 363 return nil, syserr.ErrNotSupported 364 } 365 366 // Expect first attribute is RTA_DST. 367 if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST { 368 return value, nil 369 } 370 return nil, syserr.ErrInvalidArgument 371 } 372 373 // dumpRoutes handles RTM_GETROUTE requests. 374 func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 375 // RTM_GETROUTE dump requests need not contain anything more than the 376 // netlink header and 1 byte protocol family common to all 377 // NETLINK_ROUTE requests. 378 379 stack := inet.StackFromContext(ctx) 380 if stack == nil { 381 // No network routes. 382 return nil 383 } 384 385 hdr := msg.Header() 386 routeTables := stack.RouteTable() 387 388 if hdr.Flags == linux.NLM_F_REQUEST { 389 dst, err := parseForDestination(msg) 390 if err != nil { 391 return err 392 } 393 route, err := fillRoute(routeTables, dst) 394 if err != nil { 395 // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH. 396 return syserr.ErrNotSupported 397 } 398 routeTables = append([]inet.Route{}, route) 399 } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 400 // We always send back an NLMSG_DONE. 401 ms.Multi = true 402 } else { 403 // TODO(b/68878065): Only above cases are supported. 404 return syserr.ErrNotSupported 405 } 406 407 for _, rt := range routeTables { 408 m := ms.AddMessage(linux.NetlinkMessageHeader{ 409 Type: linux.RTM_NEWROUTE, 410 }) 411 412 m.Put(&linux.RouteMessage{ 413 Family: rt.Family, 414 DstLen: rt.DstLen, 415 SrcLen: rt.SrcLen, 416 TOS: rt.TOS, 417 418 // Always return the main table since we don't have multiple 419 // routing tables. 420 Table: linux.RT_TABLE_MAIN, 421 Protocol: rt.Protocol, 422 Scope: rt.Scope, 423 Type: rt.Type, 424 425 Flags: rt.Flags, 426 }) 427 428 m.PutAttr(254, primitive.AsByteSlice([]byte{123})) 429 if rt.DstLen > 0 { 430 m.PutAttr(linux.RTA_DST, primitive.AsByteSlice(rt.DstAddr)) 431 } 432 if rt.SrcLen > 0 { 433 m.PutAttr(linux.RTA_SRC, primitive.AsByteSlice(rt.SrcAddr)) 434 } 435 if rt.OutputInterface != 0 { 436 m.PutAttr(linux.RTA_OIF, primitive.AllocateInt32(rt.OutputInterface)) 437 } 438 if len(rt.GatewayAddr) > 0 { 439 m.PutAttr(linux.RTA_GATEWAY, primitive.AsByteSlice(rt.GatewayAddr)) 440 } 441 442 // TODO(gvisor.dev/issue/578): There are many more attributes. 443 } 444 445 return nil 446 } 447 448 // newAddr handles RTM_NEWADDR requests. 449 func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 450 stack := inet.StackFromContext(ctx) 451 if stack == nil { 452 // No network stack. 453 return syserr.ErrProtocolNotSupported 454 } 455 456 var ifa linux.InterfaceAddrMessage 457 attrs, ok := msg.GetData(&ifa) 458 if !ok { 459 return syserr.ErrInvalidArgument 460 } 461 462 for !attrs.Empty() { 463 ahdr, value, rest, ok := attrs.ParseFirst() 464 if !ok { 465 return syserr.ErrInvalidArgument 466 } 467 attrs = rest 468 469 // NOTE: A netlink message will contain multiple header attributes. 470 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 471 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 472 // local interface address. We add the local interface address here 473 // and ignore the IFA_ADDRESS. 474 switch ahdr.Type { 475 case linux.IFA_LOCAL: 476 err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 477 Family: ifa.Family, 478 PrefixLen: ifa.PrefixLen, 479 Flags: ifa.Flags, 480 Addr: value, 481 }) 482 if linuxerr.Equals(linuxerr.EEXIST, err) { 483 flags := msg.Header().Flags 484 if flags&linux.NLM_F_EXCL != 0 { 485 return syserr.ErrExists 486 } 487 } else if err != nil { 488 return syserr.ErrInvalidArgument 489 } 490 case linux.IFA_ADDRESS: 491 default: 492 return syserr.ErrNotSupported 493 } 494 } 495 return nil 496 } 497 498 // delAddr handles RTM_DELADDR requests. 499 func (p *Protocol) delAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 500 stack := inet.StackFromContext(ctx) 501 if stack == nil { 502 // No network stack. 503 return syserr.ErrProtocolNotSupported 504 } 505 506 var ifa linux.InterfaceAddrMessage 507 attrs, ok := msg.GetData(&ifa) 508 if !ok { 509 return syserr.ErrInvalidArgument 510 } 511 512 for !attrs.Empty() { 513 ahdr, value, rest, ok := attrs.ParseFirst() 514 if !ok { 515 return syserr.ErrInvalidArgument 516 } 517 attrs = rest 518 519 // NOTE: A netlink message will contain multiple header attributes. 520 // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent 521 // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the 522 // local interface address. We use the local interface address to 523 // remove the address and ignore the IFA_ADDRESS. 524 switch ahdr.Type { 525 case linux.IFA_LOCAL: 526 err := stack.RemoveInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ 527 Family: ifa.Family, 528 PrefixLen: ifa.PrefixLen, 529 Flags: ifa.Flags, 530 Addr: value, 531 }) 532 if err != nil { 533 return syserr.ErrBadLocalAddress 534 } 535 case linux.IFA_ADDRESS: 536 default: 537 return syserr.ErrNotSupported 538 } 539 } 540 541 return nil 542 } 543 544 // ProcessMessage implements netlink.Protocol.ProcessMessage. 545 func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { 546 hdr := msg.Header() 547 548 // All messages start with a 1 byte protocol family. 549 var family primitive.Uint8 550 if _, ok := msg.GetData(&family); !ok { 551 // Linux ignores messages missing the protocol family. See 552 // net/core/rtnetlink.c:rtnetlink_rcv_msg. 553 return nil 554 } 555 556 // Non-GET message types require CAP_NET_ADMIN. 557 if typeKind(hdr.Type) != kindGet { 558 creds := auth.CredentialsFromContext(ctx) 559 if !creds.HasCapability(linux.CAP_NET_ADMIN) { 560 return syserr.ErrPermissionDenied 561 } 562 } 563 564 if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { 565 // TODO(b/68878065): Only the dump variant of the types below are 566 // supported. 567 switch hdr.Type { 568 case linux.RTM_GETLINK: 569 return p.dumpLinks(ctx, msg, ms) 570 case linux.RTM_GETADDR: 571 return p.dumpAddrs(ctx, msg, ms) 572 case linux.RTM_GETROUTE: 573 return p.dumpRoutes(ctx, msg, ms) 574 default: 575 return syserr.ErrNotSupported 576 } 577 } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST { 578 switch hdr.Type { 579 case linux.RTM_GETLINK: 580 return p.getLink(ctx, msg, ms) 581 case linux.RTM_DELLINK: 582 return p.delLink(ctx, msg, ms) 583 case linux.RTM_GETROUTE: 584 return p.dumpRoutes(ctx, msg, ms) 585 case linux.RTM_NEWADDR: 586 return p.newAddr(ctx, msg, ms) 587 case linux.RTM_DELADDR: 588 return p.delAddr(ctx, msg, ms) 589 default: 590 return syserr.ErrNotSupported 591 } 592 } 593 return syserr.ErrNotSupported 594 } 595 596 // init registers the NETLINK_ROUTE provider. 597 func init() { 598 netlink.RegisterProvider(linux.NETLINK_ROUTE, NewProtocol) 599 }