gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/netlink/route/protocol.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package route provides a NETLINK_ROUTE socket protocol.
    16  package route
    17  
    18  import (
    19  	"bytes"
    20  
    21  	"gvisor.dev/gvisor/pkg/abi/linux"
    22  	"gvisor.dev/gvisor/pkg/context"
    23  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    24  	"gvisor.dev/gvisor/pkg/marshal/primitive"
    25  	"gvisor.dev/gvisor/pkg/sentry/inet"
    26  	"gvisor.dev/gvisor/pkg/sentry/kernel"
    27  	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
    28  	"gvisor.dev/gvisor/pkg/sentry/socket/netlink"
    29  	"gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg"
    30  	"gvisor.dev/gvisor/pkg/syserr"
    31  )
    32  
    33  // commandKind describes the operational class of a message type.
    34  //
    35  // The route message types use the lower 2 bits of the type to describe class
    36  // of command.
    37  type commandKind int
    38  
    39  const (
    40  	kindNew commandKind = 0x0
    41  	kindDel commandKind = 0x1
    42  	kindGet commandKind = 0x2
    43  	kindSet commandKind = 0x3
    44  )
    45  
    46  func typeKind(typ uint16) commandKind {
    47  	return commandKind(typ & 0x3)
    48  }
    49  
    50  // Protocol implements netlink.Protocol.
    51  //
    52  // +stateify savable
    53  type Protocol struct{}
    54  
    55  var _ netlink.Protocol = (*Protocol)(nil)
    56  
    57  // NewProtocol creates a NETLINK_ROUTE netlink.Protocol.
    58  func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) {
    59  	return &Protocol{}, nil
    60  }
    61  
    62  // Protocol implements netlink.Protocol.Protocol.
    63  func (p *Protocol) Protocol() int {
    64  	return linux.NETLINK_ROUTE
    65  }
    66  
    67  // CanSend implements netlink.Protocol.CanSend.
    68  func (p *Protocol) CanSend() bool {
    69  	return true
    70  }
    71  
    72  // dumpLinks handles RTM_GETLINK dump requests.
    73  func (p *Protocol) dumpLinks(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
    74  	// NLM_F_DUMP + RTM_GETLINK messages are supposed to include an
    75  	// ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some
    76  	// userspace applications (including glibc) still include rtgenmsg.
    77  	// Linux has a workaround based on the total message length.
    78  	//
    79  	// We don't bother to check for either, since we don't support any
    80  	// extra attributes that may be included anyways.
    81  	//
    82  	// The message may also contain netlink attribute IFLA_EXT_MASK, which
    83  	// we don't support.
    84  
    85  	// The RTM_GETLINK dump response is a set of messages each containing
    86  	// an InterfaceInfoMessage followed by a set of netlink attributes.
    87  
    88  	// We always send back an NLMSG_DONE.
    89  	ms.Multi = true
    90  
    91  	stack := inet.StackFromContext(ctx)
    92  	if stack == nil {
    93  		// No network devices.
    94  		return nil
    95  	}
    96  
    97  	for idx, i := range stack.Interfaces() {
    98  		addNewLinkMessage(ms, idx, i)
    99  	}
   100  
   101  	return nil
   102  }
   103  
   104  // getLinks handles RTM_GETLINK requests.
   105  func (p *Protocol) getLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   106  	stack := inet.StackFromContext(ctx)
   107  	if stack == nil {
   108  		// No network devices.
   109  		return nil
   110  	}
   111  
   112  	// Parse message.
   113  	var ifi linux.InterfaceInfoMessage
   114  	attrs, ok := msg.GetData(&ifi)
   115  	if !ok {
   116  		return syserr.ErrInvalidArgument
   117  	}
   118  
   119  	// Parse attributes.
   120  	var byName []byte
   121  	for !attrs.Empty() {
   122  		ahdr, value, rest, ok := attrs.ParseFirst()
   123  		if !ok {
   124  			return syserr.ErrInvalidArgument
   125  		}
   126  		attrs = rest
   127  
   128  		switch ahdr.Type {
   129  		case linux.IFLA_IFNAME:
   130  			if len(value) < 1 {
   131  				return syserr.ErrInvalidArgument
   132  			}
   133  			byName = value[:len(value)-1]
   134  
   135  			// TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK.
   136  		}
   137  	}
   138  
   139  	found := false
   140  	for idx, i := range stack.Interfaces() {
   141  		switch {
   142  		case ifi.Index > 0:
   143  			if idx != ifi.Index {
   144  				continue
   145  			}
   146  		case byName != nil:
   147  			if string(byName) != i.Name {
   148  				continue
   149  			}
   150  		default:
   151  			// Criteria not specified.
   152  			return syserr.ErrInvalidArgument
   153  		}
   154  
   155  		addNewLinkMessage(ms, idx, i)
   156  		found = true
   157  		break
   158  	}
   159  	if !found {
   160  		return syserr.ErrNoDevice
   161  	}
   162  	return nil
   163  }
   164  
   165  func (p *Protocol) newLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   166  	stack := inet.StackFromContext(ctx)
   167  	if stack == nil {
   168  		// No network stack.
   169  		return syserr.ErrProtocolNotSupported
   170  	}
   171  
   172  	return stack.SetInterface(ctx, msg)
   173  }
   174  
   175  // delLink handles RTM_DELLINK requests.
   176  func (p *Protocol) delLink(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   177  	stack := inet.StackFromContext(ctx)
   178  	if stack == nil {
   179  		// No network stack.
   180  		return syserr.ErrProtocolNotSupported
   181  	}
   182  
   183  	var ifinfomsg linux.InterfaceInfoMessage
   184  	attrs, ok := msg.GetData(&ifinfomsg)
   185  	if !ok {
   186  		return syserr.ErrInvalidArgument
   187  	}
   188  	if ifinfomsg.Index == 0 {
   189  		// The index is unspecified, search by the interface name.
   190  		ahdr, value, _, ok := attrs.ParseFirst()
   191  		if !ok {
   192  			return syserr.ErrInvalidArgument
   193  		}
   194  		switch ahdr.Type {
   195  		case linux.IFLA_IFNAME:
   196  			if len(value) < 1 {
   197  				return syserr.ErrInvalidArgument
   198  			}
   199  			ifname := string(value[:len(value)-1])
   200  			for idx, ifa := range stack.Interfaces() {
   201  				if ifname == ifa.Name {
   202  					ifinfomsg.Index = idx
   203  					break
   204  				}
   205  			}
   206  		default:
   207  			return syserr.ErrInvalidArgument
   208  		}
   209  		if ifinfomsg.Index == 0 {
   210  			return syserr.ErrNoDevice
   211  		}
   212  	}
   213  	return syserr.FromError(stack.RemoveInterface(ifinfomsg.Index))
   214  }
   215  
   216  // addNewLinkMessage appends RTM_NEWLINK message for the given interface into
   217  // the message set.
   218  func addNewLinkMessage(ms *nlmsg.MessageSet, idx int32, i inet.Interface) {
   219  	m := ms.AddMessage(linux.NetlinkMessageHeader{
   220  		Type: linux.RTM_NEWLINK,
   221  	})
   222  
   223  	m.Put(&linux.InterfaceInfoMessage{
   224  		Family: linux.AF_UNSPEC,
   225  		Type:   i.DeviceType,
   226  		Index:  idx,
   227  		Flags:  i.Flags,
   228  	})
   229  
   230  	m.PutAttrString(linux.IFLA_IFNAME, i.Name)
   231  	m.PutAttr(linux.IFLA_MTU, primitive.AllocateUint32(i.MTU))
   232  
   233  	mac := make([]byte, 6)
   234  	brd := mac
   235  	if len(i.Addr) > 0 {
   236  		mac = i.Addr
   237  		brd = bytes.Repeat([]byte{0xff}, len(i.Addr))
   238  	}
   239  	m.PutAttr(linux.IFLA_ADDRESS, primitive.AsByteSlice(mac))
   240  	m.PutAttr(linux.IFLA_BROADCAST, primitive.AsByteSlice(brd))
   241  
   242  	// TODO(gvisor.dev/issue/578): There are many more attributes.
   243  }
   244  
   245  // dumpAddrs handles RTM_GETADDR dump requests.
   246  func (p *Protocol) dumpAddrs(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   247  	// RTM_GETADDR dump requests need not contain anything more than the
   248  	// netlink header and 1 byte protocol family common to all
   249  	// NETLINK_ROUTE requests.
   250  	//
   251  	// TODO(b/68878065): Filter output by passed protocol family.
   252  
   253  	// The RTM_GETADDR dump response is a set of RTM_NEWADDR messages each
   254  	// containing an InterfaceAddrMessage followed by a set of netlink
   255  	// attributes.
   256  
   257  	// We always send back an NLMSG_DONE.
   258  	ms.Multi = true
   259  
   260  	stack := inet.StackFromContext(ctx)
   261  	if stack == nil {
   262  		// No network devices.
   263  		return nil
   264  	}
   265  
   266  	for id, as := range stack.InterfaceAddrs() {
   267  		for _, a := range as {
   268  			m := ms.AddMessage(linux.NetlinkMessageHeader{
   269  				Type: linux.RTM_NEWADDR,
   270  			})
   271  
   272  			m.Put(&linux.InterfaceAddrMessage{
   273  				Family:    a.Family,
   274  				PrefixLen: a.PrefixLen,
   275  				Index:     uint32(id),
   276  			})
   277  
   278  			addr := primitive.ByteSlice([]byte(a.Addr))
   279  			m.PutAttr(linux.IFA_LOCAL, &addr)
   280  			m.PutAttr(linux.IFA_ADDRESS, &addr)
   281  
   282  			// TODO(gvisor.dev/issue/578): There are many more attributes.
   283  		}
   284  	}
   285  
   286  	return nil
   287  }
   288  
   289  // commonPrefixLen reports the length of the longest IP address prefix.
   290  // This is a simplified version from Golang's src/net/addrselect.go.
   291  func commonPrefixLen(a, b []byte) (cpl int) {
   292  	for len(a) > 0 {
   293  		if a[0] == b[0] {
   294  			cpl += 8
   295  			a = a[1:]
   296  			b = b[1:]
   297  			continue
   298  		}
   299  		bits := 8
   300  		ab, bb := a[0], b[0]
   301  		for {
   302  			ab >>= 1
   303  			bb >>= 1
   304  			bits--
   305  			if ab == bb {
   306  				cpl += bits
   307  				return
   308  			}
   309  		}
   310  	}
   311  	return
   312  }
   313  
   314  // fillRoute returns the Route using LPM algorithm. Refer to Linux's
   315  // net/ipv4/route.c:rt_fill_info().
   316  func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) {
   317  	family := uint8(linux.AF_INET)
   318  	if len(addr) != 4 {
   319  		family = linux.AF_INET6
   320  	}
   321  
   322  	idx := -1    // Index of the Route rule to be returned.
   323  	idxDef := -1 // Index of the default route rule.
   324  	prefix := 0  // Current longest prefix.
   325  	for i, route := range routes {
   326  		if route.Family != family {
   327  			continue
   328  		}
   329  
   330  		if len(route.GatewayAddr) > 0 && route.DstLen == 0 {
   331  			idxDef = i
   332  			continue
   333  		}
   334  
   335  		cpl := commonPrefixLen(addr, route.DstAddr)
   336  		if cpl < int(route.DstLen) {
   337  			continue
   338  		}
   339  		cpl = int(route.DstLen)
   340  		if cpl > prefix {
   341  			idx = i
   342  			prefix = cpl
   343  		}
   344  	}
   345  	if idx == -1 {
   346  		idx = idxDef
   347  	}
   348  	if idx == -1 {
   349  		return inet.Route{}, syserr.ErrHostUnreachable
   350  	}
   351  
   352  	route := routes[idx]
   353  	if family == linux.AF_INET {
   354  		route.DstLen = 32
   355  	} else {
   356  		route.DstLen = 128
   357  	}
   358  	route.DstAddr = addr
   359  	route.Flags |= linux.RTM_F_CLONED // This route is cloned.
   360  	return route, nil
   361  }
   362  
   363  // parseForDestination parses a message as format of RouteMessage-RtAttr-dst.
   364  func parseForDestination(msg *nlmsg.Message) ([]byte, *syserr.Error) {
   365  	var rtMsg linux.RouteMessage
   366  	attrs, ok := msg.GetData(&rtMsg)
   367  	if !ok {
   368  		return nil, syserr.ErrInvalidArgument
   369  	}
   370  	// iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See
   371  	// commit bc234301af12. Note we don't check this flag for backward
   372  	// compatibility.
   373  	if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE {
   374  		return nil, syserr.ErrNotSupported
   375  	}
   376  
   377  	// Expect first attribute is RTA_DST.
   378  	if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST {
   379  		return value, nil
   380  	}
   381  	return nil, syserr.ErrInvalidArgument
   382  }
   383  
   384  // dumpRoutes handles RTM_GETROUTE requests.
   385  func (p *Protocol) dumpRoutes(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   386  	// RTM_GETROUTE dump requests need not contain anything more than the
   387  	// netlink header and 1 byte protocol family common to all
   388  	// NETLINK_ROUTE requests.
   389  
   390  	stack := inet.StackFromContext(ctx)
   391  	if stack == nil {
   392  		// No network routes.
   393  		return nil
   394  	}
   395  
   396  	hdr := msg.Header()
   397  	routeTables := stack.RouteTable()
   398  
   399  	if hdr.Flags == linux.NLM_F_REQUEST {
   400  		dst, err := parseForDestination(msg)
   401  		if err != nil {
   402  			return err
   403  		}
   404  		route, err := fillRoute(routeTables, dst)
   405  		if err != nil {
   406  			// TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH.
   407  			return syserr.ErrNotSupported
   408  		}
   409  		routeTables = append([]inet.Route{}, route)
   410  	} else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
   411  		// We always send back an NLMSG_DONE.
   412  		ms.Multi = true
   413  	} else {
   414  		// TODO(b/68878065): Only above cases are supported.
   415  		return syserr.ErrNotSupported
   416  	}
   417  
   418  	for _, rt := range routeTables {
   419  		m := ms.AddMessage(linux.NetlinkMessageHeader{
   420  			Type: linux.RTM_NEWROUTE,
   421  		})
   422  
   423  		m.Put(&linux.RouteMessage{
   424  			Family: rt.Family,
   425  			DstLen: rt.DstLen,
   426  			SrcLen: rt.SrcLen,
   427  			TOS:    rt.TOS,
   428  
   429  			// Always return the main table since we don't have multiple
   430  			// routing tables.
   431  			Table:    linux.RT_TABLE_MAIN,
   432  			Protocol: rt.Protocol,
   433  			Scope:    rt.Scope,
   434  			Type:     rt.Type,
   435  
   436  			Flags: rt.Flags,
   437  		})
   438  
   439  		m.PutAttr(254, primitive.AsByteSlice([]byte{123}))
   440  		if rt.DstLen > 0 {
   441  			m.PutAttr(linux.RTA_DST, primitive.AsByteSlice(rt.DstAddr))
   442  		}
   443  		if rt.SrcLen > 0 {
   444  			m.PutAttr(linux.RTA_SRC, primitive.AsByteSlice(rt.SrcAddr))
   445  		}
   446  		if rt.OutputInterface != 0 {
   447  			m.PutAttr(linux.RTA_OIF, primitive.AllocateInt32(rt.OutputInterface))
   448  		}
   449  		if len(rt.GatewayAddr) > 0 {
   450  			m.PutAttr(linux.RTA_GATEWAY, primitive.AsByteSlice(rt.GatewayAddr))
   451  		}
   452  
   453  		// TODO(gvisor.dev/issue/578): There are many more attributes.
   454  	}
   455  
   456  	return nil
   457  }
   458  
   459  // newAddr handles RTM_NEWADDR requests.
   460  func (p *Protocol) newAddr(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   461  	stack := inet.StackFromContext(ctx)
   462  	if stack == nil {
   463  		// No network stack.
   464  		return syserr.ErrProtocolNotSupported
   465  	}
   466  
   467  	var ifa linux.InterfaceAddrMessage
   468  	attrs, ok := msg.GetData(&ifa)
   469  	if !ok {
   470  		return syserr.ErrInvalidArgument
   471  	}
   472  
   473  	for !attrs.Empty() {
   474  		ahdr, value, rest, ok := attrs.ParseFirst()
   475  		if !ok {
   476  			return syserr.ErrInvalidArgument
   477  		}
   478  		attrs = rest
   479  
   480  		// NOTE: A netlink message will contain multiple header attributes.
   481  		// Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent
   482  		// with IFA_ADDRESS being a prefix address and IFA_LOCAL being the
   483  		// local interface address. We add the local interface address here
   484  		// and ignore the IFA_ADDRESS.
   485  		switch ahdr.Type {
   486  		case linux.IFA_LOCAL:
   487  			err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{
   488  				Family:    ifa.Family,
   489  				PrefixLen: ifa.PrefixLen,
   490  				Flags:     ifa.Flags,
   491  				Addr:      value,
   492  			})
   493  			if linuxerr.Equals(linuxerr.EEXIST, err) {
   494  				flags := msg.Header().Flags
   495  				if flags&linux.NLM_F_EXCL != 0 {
   496  					return syserr.ErrExists
   497  				}
   498  			} else if err != nil {
   499  				return syserr.ErrInvalidArgument
   500  			}
   501  		case linux.IFA_ADDRESS:
   502  		default:
   503  			return syserr.ErrNotSupported
   504  		}
   505  	}
   506  	return nil
   507  }
   508  
   509  // delAddr handles RTM_DELADDR requests.
   510  func (p *Protocol) delAddr(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   511  	stack := inet.StackFromContext(ctx)
   512  	if stack == nil {
   513  		// No network stack.
   514  		return syserr.ErrProtocolNotSupported
   515  	}
   516  
   517  	var ifa linux.InterfaceAddrMessage
   518  	attrs, ok := msg.GetData(&ifa)
   519  	if !ok {
   520  		return syserr.ErrInvalidArgument
   521  	}
   522  
   523  	for !attrs.Empty() {
   524  		ahdr, value, rest, ok := attrs.ParseFirst()
   525  		if !ok {
   526  			return syserr.ErrInvalidArgument
   527  		}
   528  		attrs = rest
   529  
   530  		// NOTE: A netlink message will contain multiple header attributes.
   531  		// Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent
   532  		// with IFA_ADDRESS being a prefix address and IFA_LOCAL being the
   533  		// local interface address. We use the local interface address to
   534  		// remove the address and ignore the IFA_ADDRESS.
   535  		switch ahdr.Type {
   536  		case linux.IFA_LOCAL:
   537  			err := stack.RemoveInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{
   538  				Family:    ifa.Family,
   539  				PrefixLen: ifa.PrefixLen,
   540  				Flags:     ifa.Flags,
   541  				Addr:      value,
   542  			})
   543  			if err != nil {
   544  				return syserr.ErrBadLocalAddress
   545  			}
   546  		case linux.IFA_ADDRESS:
   547  		default:
   548  			return syserr.ErrNotSupported
   549  		}
   550  	}
   551  
   552  	return nil
   553  }
   554  
   555  // ProcessMessage implements netlink.Protocol.ProcessMessage.
   556  func (p *Protocol) ProcessMessage(ctx context.Context, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error {
   557  	hdr := msg.Header()
   558  
   559  	// All messages start with a 1 byte protocol family.
   560  	var family primitive.Uint8
   561  	if _, ok := msg.GetData(&family); !ok {
   562  		// Linux ignores messages missing the protocol family. See
   563  		// net/core/rtnetlink.c:rtnetlink_rcv_msg.
   564  		return nil
   565  	}
   566  
   567  	// Non-GET message types require CAP_NET_ADMIN.
   568  	if typeKind(hdr.Type) != kindGet {
   569  		creds := auth.CredentialsFromContext(ctx)
   570  		if !creds.HasCapability(linux.CAP_NET_ADMIN) {
   571  			return syserr.ErrPermissionDenied
   572  		}
   573  	}
   574  
   575  	if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
   576  		// TODO(b/68878065): Only the dump variant of the types below are
   577  		// supported.
   578  		switch hdr.Type {
   579  		case linux.RTM_GETLINK:
   580  			return p.dumpLinks(ctx, msg, ms)
   581  		case linux.RTM_GETADDR:
   582  			return p.dumpAddrs(ctx, msg, ms)
   583  		case linux.RTM_GETROUTE:
   584  			return p.dumpRoutes(ctx, msg, ms)
   585  		default:
   586  			return syserr.ErrNotSupported
   587  		}
   588  	} else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST {
   589  		switch hdr.Type {
   590  		case linux.RTM_NEWLINK:
   591  			return p.newLink(ctx, msg, ms)
   592  		case linux.RTM_GETLINK:
   593  			return p.getLink(ctx, msg, ms)
   594  		case linux.RTM_DELLINK:
   595  			return p.delLink(ctx, msg, ms)
   596  		case linux.RTM_GETROUTE:
   597  			return p.dumpRoutes(ctx, msg, ms)
   598  		case linux.RTM_NEWADDR:
   599  			return p.newAddr(ctx, msg, ms)
   600  		case linux.RTM_DELADDR:
   601  			return p.delAddr(ctx, msg, ms)
   602  		default:
   603  			return syserr.ErrNotSupported
   604  		}
   605  	}
   606  	return syserr.ErrNotSupported
   607  }
   608  
   609  // init registers the NETLINK_ROUTE provider.
   610  func init() {
   611  	netlink.RegisterProvider(linux.NETLINK_ROUTE, NewProtocol)
   612  }