github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/loader/vdso.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package loader
    16  
    17  import (
    18  	"bytes"
    19  	"debug/elf"
    20  	"fmt"
    21  	"io"
    22  
    23  	"github.com/metacubex/gvisor/pkg/abi"
    24  	"github.com/metacubex/gvisor/pkg/context"
    25  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    26  	"github.com/metacubex/gvisor/pkg/hostarch"
    27  	"github.com/metacubex/gvisor/pkg/log"
    28  	"github.com/metacubex/gvisor/pkg/safemem"
    29  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    30  	"github.com/metacubex/gvisor/pkg/sentry/loader/vdsodata"
    31  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    32  	"github.com/metacubex/gvisor/pkg/sentry/mm"
    33  	"github.com/metacubex/gvisor/pkg/sentry/pgalloc"
    34  	"github.com/metacubex/gvisor/pkg/sentry/uniqueid"
    35  	"github.com/metacubex/gvisor/pkg/sentry/usage"
    36  	"github.com/metacubex/gvisor/pkg/usermem"
    37  )
    38  
    39  const vdsoPrelink = 0xffffffffff700000
    40  
    41  type fileContext struct {
    42  	context.Context
    43  }
    44  
    45  func (f *fileContext) Value(key any) any {
    46  	switch key {
    47  	case uniqueid.CtxGlobalUniqueID:
    48  		return uint64(0)
    49  	default:
    50  		return f.Context.Value(key)
    51  	}
    52  }
    53  
    54  type byteFullReader struct {
    55  	data []byte
    56  }
    57  
    58  // ReadFull implements fullReader.ReadFull.
    59  func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
    60  	if offset < 0 {
    61  		return 0, linuxerr.EINVAL
    62  	}
    63  	if offset >= int64(len(b.data)) {
    64  		return 0, io.EOF
    65  	}
    66  	n, err := dst.CopyOut(ctx, b.data[offset:])
    67  	return int64(n), err
    68  }
    69  
    70  // validateVDSO checks that the VDSO can be loaded by loadVDSO.
    71  //
    72  // VDSOs are special (see below). Since we are going to map the VDSO directly
    73  // rather than using a normal loading process, we require that the PT_LOAD
    74  // segments have the same layout in the ELF as they expect to have in memory.
    75  //
    76  // Namely, this means that we must verify:
    77  //   - PT_LOAD file offsets are equivalent to the memory offset from the first
    78  //     segment.
    79  //   - No extra zeroed space (memsz) is required.
    80  //   - PT_LOAD segments are in order.
    81  //   - No two PT_LOAD segments occupy parts of the same page.
    82  //   - PT_LOAD segments don't extend beyond the end of the file.
    83  //
    84  // ctx may be nil if f does not need it.
    85  func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) {
    86  	info, err := parseHeader(ctx, f)
    87  	if err != nil {
    88  		log.Infof("Unable to parse VDSO header: %v", err)
    89  		return elfInfo{}, err
    90  	}
    91  
    92  	var first *elf.ProgHeader
    93  	var prev *elf.ProgHeader
    94  	var prevEnd hostarch.Addr
    95  	for i, phdr := range info.phdrs {
    96  		if phdr.Type != elf.PT_LOAD {
    97  			continue
    98  		}
    99  
   100  		if first == nil {
   101  			first = &info.phdrs[i]
   102  			if phdr.Off != 0 {
   103  				log.Warningf("First PT_LOAD segment has non-zero file offset")
   104  				return elfInfo{}, linuxerr.ENOEXEC
   105  			}
   106  		}
   107  
   108  		memoryOffset := phdr.Vaddr - first.Vaddr
   109  		if memoryOffset != phdr.Off {
   110  			log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off)
   111  			return elfInfo{}, linuxerr.ENOEXEC
   112  		}
   113  
   114  		// memsz larger than filesz means that extra zeroed space should be
   115  		// provided at the end of the segment. Since we are mapping the ELF
   116  		// directly, we don't want to just overwrite part of the ELF with
   117  		// zeroes.
   118  		if phdr.Memsz != phdr.Filesz {
   119  			log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz)
   120  			return elfInfo{}, linuxerr.ENOEXEC
   121  		}
   122  
   123  		start := hostarch.Addr(memoryOffset)
   124  		end, ok := start.AddLength(phdr.Memsz)
   125  		if !ok {
   126  			log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end)
   127  			return elfInfo{}, linuxerr.ENOEXEC
   128  		}
   129  		if uint64(end) > size {
   130  			log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size)
   131  			return elfInfo{}, linuxerr.ENOEXEC
   132  		}
   133  
   134  		if prev != nil {
   135  			if start < prevEnd {
   136  				log.Warningf("PT_LOAD segments out of order")
   137  				return elfInfo{}, linuxerr.ENOEXEC
   138  			}
   139  
   140  			// We mprotect entire pages, so each segment must be in
   141  			// its own page.
   142  			prevEndPage := prevEnd.RoundDown()
   143  			startPage := start.RoundDown()
   144  			if prevEndPage >= startPage {
   145  				log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage)
   146  				return elfInfo{}, linuxerr.ENOEXEC
   147  			}
   148  		}
   149  		prev = &info.phdrs[i]
   150  		prevEnd = end
   151  	}
   152  
   153  	return info, nil
   154  }
   155  
   156  // VDSO describes a VDSO.
   157  //
   158  // NOTE(mpratt): to support multiple architectures or operating systems, this
   159  // would need to contain a VDSO for each.
   160  //
   161  // +stateify savable
   162  type VDSO struct {
   163  	// ParamPage is the VDSO parameter page. This page should be updated to
   164  	// inform the VDSO for timekeeping data.
   165  	ParamPage *mm.SpecialMappable
   166  
   167  	// vdso is the VDSO ELF itself.
   168  	vdso *mm.SpecialMappable
   169  
   170  	// os is the operating system targeted by the VDSO.
   171  	os abi.OS
   172  
   173  	// arch is the architecture targeted by the VDSO.
   174  	arch arch.Arch
   175  
   176  	// phdrs are the VDSO ELF phdrs.
   177  	phdrs []elf.ProgHeader `state:".([]elfProgHeader)"`
   178  }
   179  
   180  // PrepareVDSO validates the system VDSO and returns a VDSO, containing the
   181  // param page for updating by the kernel.
   182  func PrepareVDSO(mf *pgalloc.MemoryFile) (*VDSO, error) {
   183  	vdsoFile := &byteFullReader{data: vdsodata.Binary}
   184  
   185  	// First make sure the VDSO is valid. vdsoFile does not use ctx, so a
   186  	// nil context can be passed.
   187  	info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsodata.Binary)))
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	// Then copy it into a VDSO mapping.
   193  	size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp()
   194  	if !ok {
   195  		return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary))
   196  	}
   197  
   198  	vdso, err := mf.Allocate(uint64(size), pgalloc.AllocOpts{Kind: usage.System})
   199  	if err != nil {
   200  		return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err)
   201  	}
   202  
   203  	ims, err := mf.MapInternal(vdso, hostarch.ReadWrite)
   204  	if err != nil {
   205  		mf.DecRef(vdso)
   206  		return nil, fmt.Errorf("unable to map VDSO memory: %v", err)
   207  	}
   208  
   209  	_, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsodata.Binary)))
   210  	if err != nil {
   211  		mf.DecRef(vdso)
   212  		return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err)
   213  	}
   214  
   215  	// Finally, allocate a param page for this VDSO.
   216  	paramPage, err := mf.Allocate(hostarch.PageSize, pgalloc.AllocOpts{Kind: usage.System})
   217  	if err != nil {
   218  		mf.DecRef(vdso)
   219  		return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err)
   220  	}
   221  
   222  	return &VDSO{
   223  		ParamPage: mm.NewSpecialMappable("[vvar]", mf, paramPage),
   224  		// TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as
   225  		// some applications may not be able to handle multiple [vdso]
   226  		// hints.
   227  		vdso:  mm.NewSpecialMappable("", mf, vdso),
   228  		os:    info.os,
   229  		arch:  info.arch,
   230  		phdrs: info.phdrs,
   231  	}, nil
   232  }
   233  
   234  // loadVDSO loads the VDSO into m.
   235  //
   236  // VDSOs are special.
   237  //
   238  // VDSOs are fully position independent. However, instead of loading a VDSO
   239  // like a normal ELF binary, mapping only the PT_LOAD segments, the Linux
   240  // kernel simply directly maps the entire file into process memory, with very
   241  // little real ELF parsing.
   242  //
   243  // NOTE(b/25323870): This means that userspace can, and unfortunately does,
   244  // depend on parts of the ELF that would normally not be mapped.  To maintain
   245  // compatibility with such binaries, we load the VDSO much like Linux.
   246  //
   247  // loadVDSO takes a reference on the VDSO and parameter page FrameRegions.
   248  func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) {
   249  	if v.os != bin.os {
   250  		ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os)
   251  		return 0, linuxerr.ENOEXEC
   252  	}
   253  	if v.arch != bin.arch {
   254  		ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch)
   255  		return 0, linuxerr.ENOEXEC
   256  	}
   257  
   258  	// Reserve address space for the VDSO and its parameter page, which is
   259  	// mapped just before the VDSO.
   260  	mapSize := v.vdso.Length() + v.ParamPage.Length()
   261  	addr, err := m.MMap(ctx, memmap.MMapOpts{
   262  		Length:  mapSize,
   263  		Private: true,
   264  	})
   265  	if err != nil {
   266  		ctx.Infof("Unable to reserve VDSO address space: %v", err)
   267  		return 0, err
   268  	}
   269  
   270  	// Now map the param page.
   271  	_, err = m.MMap(ctx, memmap.MMapOpts{
   272  		Length:          v.ParamPage.Length(),
   273  		MappingIdentity: v.ParamPage,
   274  		Mappable:        v.ParamPage,
   275  		Addr:            addr,
   276  		Fixed:           true,
   277  		Unmap:           true,
   278  		Private:         true,
   279  		Perms:           hostarch.Read,
   280  		MaxPerms:        hostarch.Read,
   281  	})
   282  	if err != nil {
   283  		ctx.Infof("Unable to map VDSO param page: %v", err)
   284  		return 0, err
   285  	}
   286  
   287  	// Now map the VDSO itself.
   288  	vdsoAddr, ok := addr.AddLength(v.ParamPage.Length())
   289  	if !ok {
   290  		panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length()))
   291  	}
   292  	_, err = m.MMap(ctx, memmap.MMapOpts{
   293  		Length:          v.vdso.Length(),
   294  		MappingIdentity: v.vdso,
   295  		Mappable:        v.vdso,
   296  		Addr:            vdsoAddr,
   297  		Fixed:           true,
   298  		Unmap:           true,
   299  		Private:         true,
   300  		Perms:           hostarch.Read,
   301  		MaxPerms:        hostarch.AnyAccess,
   302  	})
   303  	if err != nil {
   304  		ctx.Infof("Unable to map VDSO: %v", err)
   305  		return 0, err
   306  	}
   307  
   308  	vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length())
   309  	if !ok {
   310  		panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length()))
   311  	}
   312  
   313  	// Set additional protections for the individual segments.
   314  	var first *elf.ProgHeader
   315  	for i, phdr := range v.phdrs {
   316  		if phdr.Type != elf.PT_LOAD {
   317  			continue
   318  		}
   319  
   320  		if first == nil {
   321  			first = &v.phdrs[i]
   322  		}
   323  
   324  		memoryOffset := phdr.Vaddr - first.Vaddr
   325  		segAddr, ok := vdsoAddr.AddLength(memoryOffset)
   326  		if !ok {
   327  			ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset)
   328  			return 0, linuxerr.ENOEXEC
   329  		}
   330  		segPage := segAddr.RoundDown()
   331  		segSize := hostarch.Addr(phdr.Memsz)
   332  		segSize, ok = segSize.AddLength(segAddr.PageOffset())
   333  		if !ok {
   334  			ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset())
   335  			return 0, linuxerr.ENOEXEC
   336  		}
   337  		segSize, ok = segSize.RoundUp()
   338  		if !ok {
   339  			ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset())
   340  			return 0, linuxerr.ENOEXEC
   341  		}
   342  		segEnd, ok := segPage.AddLength(uint64(segSize))
   343  		if !ok {
   344  			ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize)
   345  			return 0, linuxerr.ENOEXEC
   346  		}
   347  		if segEnd > vdsoEnd {
   348  			ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd)
   349  			return 0, linuxerr.ENOEXEC
   350  		}
   351  
   352  		perms := progFlagsAsPerms(phdr.Flags)
   353  		if perms != hostarch.Read {
   354  			if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil {
   355  				ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err)
   356  				return 0, linuxerr.ENOEXEC
   357  			}
   358  		}
   359  	}
   360  
   361  	return vdsoAddr, nil
   362  }
   363  
   364  // Release drops references on mappings held by v.
   365  func (v *VDSO) Release(ctx context.Context) {
   366  	v.ParamPage.DecRef(ctx)
   367  	v.vdso.DecRef(ctx)
   368  }
   369  
   370  var vdsoSigreturnOffset = func() uint64 {
   371  	f, err := elf.NewFile(bytes.NewReader(vdsodata.Binary))
   372  	if err != nil {
   373  		panic(fmt.Sprintf("failed to parse vdso.so as ELF file: %v", err))
   374  	}
   375  	syms, err := f.Symbols()
   376  	if err != nil {
   377  		panic(fmt.Sprintf("failed to read symbols from vdso.so: %v", err))
   378  	}
   379  	const sigreturnSymbol = "__kernel_rt_sigreturn"
   380  	for _, sym := range syms {
   381  		if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF && sym.Name == sigreturnSymbol {
   382  			return sym.Value
   383  		}
   384  	}
   385  	panic(fmt.Sprintf("no symbol %q in vdso.so", sigreturnSymbol))
   386  }()