github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/loader/vdso.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package loader
    16  
    17  import (
    18  	"bytes"
    19  	"debug/elf"
    20  	"fmt"
    21  	"io"
    22  
    23  	"github.com/MerlinKodo/gvisor/pkg/abi"
    24  	"github.com/MerlinKodo/gvisor/pkg/context"
    25  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    26  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    27  	"github.com/MerlinKodo/gvisor/pkg/log"
    28  	"github.com/MerlinKodo/gvisor/pkg/safemem"
    29  	"github.com/MerlinKodo/gvisor/pkg/sentry/arch"
    30  	"github.com/MerlinKodo/gvisor/pkg/sentry/loader/vdsodata"
    31  	"github.com/MerlinKodo/gvisor/pkg/sentry/memmap"
    32  	"github.com/MerlinKodo/gvisor/pkg/sentry/mm"
    33  	"github.com/MerlinKodo/gvisor/pkg/sentry/pgalloc"
    34  	"github.com/MerlinKodo/gvisor/pkg/sentry/uniqueid"
    35  	"github.com/MerlinKodo/gvisor/pkg/sentry/usage"
    36  	"github.com/MerlinKodo/gvisor/pkg/usermem"
    37  )
    38  
    39  const vdsoPrelink = 0xffffffffff700000
    40  
    41  type fileContext struct {
    42  	context.Context
    43  }
    44  
    45  func (f *fileContext) Value(key any) any {
    46  	switch key {
    47  	case uniqueid.CtxGlobalUniqueID:
    48  		return uint64(0)
    49  	default:
    50  		return f.Context.Value(key)
    51  	}
    52  }
    53  
    54  type byteFullReader struct {
    55  	data []byte
    56  }
    57  
    58  // ReadFull implements fullReader.ReadFull.
    59  func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
    60  	if offset < 0 {
    61  		return 0, linuxerr.EINVAL
    62  	}
    63  	if offset >= int64(len(b.data)) {
    64  		return 0, io.EOF
    65  	}
    66  	n, err := dst.CopyOut(ctx, b.data[offset:])
    67  	return int64(n), err
    68  }
    69  
    70  // validateVDSO checks that the VDSO can be loaded by loadVDSO.
    71  //
    72  // VDSOs are special (see below). Since we are going to map the VDSO directly
    73  // rather than using a normal loading process, we require that the PT_LOAD
    74  // segments have the same layout in the ELF as they expect to have in memory.
    75  //
    76  // Namely, this means that we must verify:
    77  //   - PT_LOAD file offsets are equivalent to the memory offset from the first
    78  //     segment.
    79  //   - No extra zeroed space (memsz) is required.
    80  //   - PT_LOAD segments are in order.
    81  //   - No two PT_LOAD segments occupy parts of the same page.
    82  //   - PT_LOAD segments don't extend beyond the end of the file.
    83  //
    84  // ctx may be nil if f does not need it.
    85  func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) {
    86  	info, err := parseHeader(ctx, f)
    87  	if err != nil {
    88  		log.Infof("Unable to parse VDSO header: %v", err)
    89  		return elfInfo{}, err
    90  	}
    91  
    92  	var first *elf.ProgHeader
    93  	var prev *elf.ProgHeader
    94  	var prevEnd hostarch.Addr
    95  	for i, phdr := range info.phdrs {
    96  		if phdr.Type != elf.PT_LOAD {
    97  			continue
    98  		}
    99  
   100  		if first == nil {
   101  			first = &info.phdrs[i]
   102  			if phdr.Off != 0 {
   103  				log.Warningf("First PT_LOAD segment has non-zero file offset")
   104  				return elfInfo{}, linuxerr.ENOEXEC
   105  			}
   106  		}
   107  
   108  		memoryOffset := phdr.Vaddr - first.Vaddr
   109  		if memoryOffset != phdr.Off {
   110  			log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off)
   111  			return elfInfo{}, linuxerr.ENOEXEC
   112  		}
   113  
   114  		// memsz larger than filesz means that extra zeroed space should be
   115  		// provided at the end of the segment. Since we are mapping the ELF
   116  		// directly, we don't want to just overwrite part of the ELF with
   117  		// zeroes.
   118  		if phdr.Memsz != phdr.Filesz {
   119  			log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz)
   120  			return elfInfo{}, linuxerr.ENOEXEC
   121  		}
   122  
   123  		start := hostarch.Addr(memoryOffset)
   124  		end, ok := start.AddLength(phdr.Memsz)
   125  		if !ok {
   126  			log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end)
   127  			return elfInfo{}, linuxerr.ENOEXEC
   128  		}
   129  		if uint64(end) > size {
   130  			log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size)
   131  			return elfInfo{}, linuxerr.ENOEXEC
   132  		}
   133  
   134  		if prev != nil {
   135  			if start < prevEnd {
   136  				log.Warningf("PT_LOAD segments out of order")
   137  				return elfInfo{}, linuxerr.ENOEXEC
   138  			}
   139  
   140  			// We mprotect entire pages, so each segment must be in
   141  			// its own page.
   142  			prevEndPage := prevEnd.RoundDown()
   143  			startPage := start.RoundDown()
   144  			if prevEndPage >= startPage {
   145  				log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage)
   146  				return elfInfo{}, linuxerr.ENOEXEC
   147  			}
   148  		}
   149  		prev = &info.phdrs[i]
   150  		prevEnd = end
   151  	}
   152  
   153  	return info, nil
   154  }
   155  
   156  // VDSO describes a VDSO.
   157  //
   158  // NOTE(mpratt): to support multiple architectures or operating systems, this
   159  // would need to contain a VDSO for each.
   160  //
   161  // +stateify savable
   162  type VDSO struct {
   163  	// ParamPage is the VDSO parameter page. This page should be updated to
   164  	// inform the VDSO for timekeeping data.
   165  	ParamPage *mm.SpecialMappable
   166  
   167  	// vdso is the VDSO ELF itself.
   168  	vdso *mm.SpecialMappable
   169  
   170  	// os is the operating system targeted by the VDSO.
   171  	os abi.OS
   172  
   173  	// arch is the architecture targeted by the VDSO.
   174  	arch arch.Arch
   175  
   176  	// phdrs are the VDSO ELF phdrs.
   177  	phdrs []elf.ProgHeader `state:".([]elfProgHeader)"`
   178  }
   179  
   180  // PrepareVDSO validates the system VDSO and returns a VDSO, containing the
   181  // param page for updating by the kernel.
   182  func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
   183  	vdsoFile := &byteFullReader{data: vdsodata.Binary}
   184  
   185  	// First make sure the VDSO is valid. vdsoFile does not use ctx, so a
   186  	// nil context can be passed.
   187  	info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsodata.Binary)))
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	// Then copy it into a VDSO mapping.
   193  	size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp()
   194  	if !ok {
   195  		return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary))
   196  	}
   197  
   198  	mf := mfp.MemoryFile()
   199  	vdso, err := mf.Allocate(uint64(size), pgalloc.AllocOpts{Kind: usage.System})
   200  	if err != nil {
   201  		return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err)
   202  	}
   203  
   204  	ims, err := mf.MapInternal(vdso, hostarch.ReadWrite)
   205  	if err != nil {
   206  		mf.DecRef(vdso)
   207  		return nil, fmt.Errorf("unable to map VDSO memory: %v", err)
   208  	}
   209  
   210  	_, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsodata.Binary)))
   211  	if err != nil {
   212  		mf.DecRef(vdso)
   213  		return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err)
   214  	}
   215  
   216  	// Finally, allocate a param page for this VDSO.
   217  	paramPage, err := mf.Allocate(hostarch.PageSize, pgalloc.AllocOpts{Kind: usage.System})
   218  	if err != nil {
   219  		mf.DecRef(vdso)
   220  		return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err)
   221  	}
   222  
   223  	return &VDSO{
   224  		ParamPage: mm.NewSpecialMappable("[vvar]", mfp, paramPage),
   225  		// TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as
   226  		// some applications may not be able to handle multiple [vdso]
   227  		// hints.
   228  		vdso:  mm.NewSpecialMappable("", mfp, vdso),
   229  		os:    info.os,
   230  		arch:  info.arch,
   231  		phdrs: info.phdrs,
   232  	}, nil
   233  }
   234  
   235  // loadVDSO loads the VDSO into m.
   236  //
   237  // VDSOs are special.
   238  //
   239  // VDSOs are fully position independent. However, instead of loading a VDSO
   240  // like a normal ELF binary, mapping only the PT_LOAD segments, the Linux
   241  // kernel simply directly maps the entire file into process memory, with very
   242  // little real ELF parsing.
   243  //
   244  // NOTE(b/25323870): This means that userspace can, and unfortunately does,
   245  // depend on parts of the ELF that would normally not be mapped.  To maintain
   246  // compatibility with such binaries, we load the VDSO much like Linux.
   247  //
   248  // loadVDSO takes a reference on the VDSO and parameter page FrameRegions.
   249  func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) {
   250  	if v.os != bin.os {
   251  		ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os)
   252  		return 0, linuxerr.ENOEXEC
   253  	}
   254  	if v.arch != bin.arch {
   255  		ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch)
   256  		return 0, linuxerr.ENOEXEC
   257  	}
   258  
   259  	// Reserve address space for the VDSO and its parameter page, which is
   260  	// mapped just before the VDSO.
   261  	mapSize := v.vdso.Length() + v.ParamPage.Length()
   262  	addr, err := m.MMap(ctx, memmap.MMapOpts{
   263  		Length:  mapSize,
   264  		Private: true,
   265  	})
   266  	if err != nil {
   267  		ctx.Infof("Unable to reserve VDSO address space: %v", err)
   268  		return 0, err
   269  	}
   270  
   271  	// Now map the param page.
   272  	_, err = m.MMap(ctx, memmap.MMapOpts{
   273  		Length:          v.ParamPage.Length(),
   274  		MappingIdentity: v.ParamPage,
   275  		Mappable:        v.ParamPage,
   276  		Addr:            addr,
   277  		Fixed:           true,
   278  		Unmap:           true,
   279  		Private:         true,
   280  		Perms:           hostarch.Read,
   281  		MaxPerms:        hostarch.Read,
   282  	})
   283  	if err != nil {
   284  		ctx.Infof("Unable to map VDSO param page: %v", err)
   285  		return 0, err
   286  	}
   287  
   288  	// Now map the VDSO itself.
   289  	vdsoAddr, ok := addr.AddLength(v.ParamPage.Length())
   290  	if !ok {
   291  		panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length()))
   292  	}
   293  	_, err = m.MMap(ctx, memmap.MMapOpts{
   294  		Length:          v.vdso.Length(),
   295  		MappingIdentity: v.vdso,
   296  		Mappable:        v.vdso,
   297  		Addr:            vdsoAddr,
   298  		Fixed:           true,
   299  		Unmap:           true,
   300  		Private:         true,
   301  		Perms:           hostarch.Read,
   302  		MaxPerms:        hostarch.AnyAccess,
   303  	})
   304  	if err != nil {
   305  		ctx.Infof("Unable to map VDSO: %v", err)
   306  		return 0, err
   307  	}
   308  
   309  	vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length())
   310  	if !ok {
   311  		panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length()))
   312  	}
   313  
   314  	// Set additional protections for the individual segments.
   315  	var first *elf.ProgHeader
   316  	for i, phdr := range v.phdrs {
   317  		if phdr.Type != elf.PT_LOAD {
   318  			continue
   319  		}
   320  
   321  		if first == nil {
   322  			first = &v.phdrs[i]
   323  		}
   324  
   325  		memoryOffset := phdr.Vaddr - first.Vaddr
   326  		segAddr, ok := vdsoAddr.AddLength(memoryOffset)
   327  		if !ok {
   328  			ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset)
   329  			return 0, linuxerr.ENOEXEC
   330  		}
   331  		segPage := segAddr.RoundDown()
   332  		segSize := hostarch.Addr(phdr.Memsz)
   333  		segSize, ok = segSize.AddLength(segAddr.PageOffset())
   334  		if !ok {
   335  			ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset())
   336  			return 0, linuxerr.ENOEXEC
   337  		}
   338  		segSize, ok = segSize.RoundUp()
   339  		if !ok {
   340  			ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset())
   341  			return 0, linuxerr.ENOEXEC
   342  		}
   343  		segEnd, ok := segPage.AddLength(uint64(segSize))
   344  		if !ok {
   345  			ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize)
   346  			return 0, linuxerr.ENOEXEC
   347  		}
   348  		if segEnd > vdsoEnd {
   349  			ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd)
   350  			return 0, linuxerr.ENOEXEC
   351  		}
   352  
   353  		perms := progFlagsAsPerms(phdr.Flags)
   354  		if perms != hostarch.Read {
   355  			if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil {
   356  				ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err)
   357  				return 0, linuxerr.ENOEXEC
   358  			}
   359  		}
   360  	}
   361  
   362  	return vdsoAddr, nil
   363  }
   364  
   365  // Release drops references on mappings held by v.
   366  func (v *VDSO) Release(ctx context.Context) {
   367  	v.ParamPage.DecRef(ctx)
   368  	v.vdso.DecRef(ctx)
   369  }
   370  
   371  var vdsoSigreturnOffset = func() uint64 {
   372  	f, err := elf.NewFile(bytes.NewReader(vdsodata.Binary))
   373  	if err != nil {
   374  		panic(fmt.Sprintf("failed to parse vdso.so as ELF file: %v", err))
   375  	}
   376  	syms, err := f.Symbols()
   377  	if err != nil {
   378  		panic(fmt.Sprintf("failed to read symbols from vdso.so: %v", err))
   379  	}
   380  	const sigreturnSymbol = "__kernel_rt_sigreturn"
   381  	for _, sym := range syms {
   382  		if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF && sym.Name == sigreturnSymbol {
   383  			return sym.Value
   384  		}
   385  	}
   386  	panic(fmt.Sprintf("no symbol %q in vdso.so", sigreturnSymbol))
   387  }()