github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/loader/vdso.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package loader 16 17 import ( 18 "bytes" 19 "debug/elf" 20 "fmt" 21 "io" 22 23 "github.com/MerlinKodo/gvisor/pkg/abi" 24 "github.com/MerlinKodo/gvisor/pkg/context" 25 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 26 "github.com/MerlinKodo/gvisor/pkg/hostarch" 27 "github.com/MerlinKodo/gvisor/pkg/log" 28 "github.com/MerlinKodo/gvisor/pkg/safemem" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 30 "github.com/MerlinKodo/gvisor/pkg/sentry/loader/vdsodata" 31 "github.com/MerlinKodo/gvisor/pkg/sentry/memmap" 32 "github.com/MerlinKodo/gvisor/pkg/sentry/mm" 33 "github.com/MerlinKodo/gvisor/pkg/sentry/pgalloc" 34 "github.com/MerlinKodo/gvisor/pkg/sentry/uniqueid" 35 "github.com/MerlinKodo/gvisor/pkg/sentry/usage" 36 "github.com/MerlinKodo/gvisor/pkg/usermem" 37 ) 38 39 const vdsoPrelink = 0xffffffffff700000 40 41 type fileContext struct { 42 context.Context 43 } 44 45 func (f *fileContext) Value(key any) any { 46 switch key { 47 case uniqueid.CtxGlobalUniqueID: 48 return uint64(0) 49 default: 50 return f.Context.Value(key) 51 } 52 } 53 54 type byteFullReader struct { 55 data []byte 56 } 57 58 // ReadFull implements fullReader.ReadFull. 59 func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { 60 if offset < 0 { 61 return 0, linuxerr.EINVAL 62 } 63 if offset >= int64(len(b.data)) { 64 return 0, io.EOF 65 } 66 n, err := dst.CopyOut(ctx, b.data[offset:]) 67 return int64(n), err 68 } 69 70 // validateVDSO checks that the VDSO can be loaded by loadVDSO. 71 // 72 // VDSOs are special (see below). Since we are going to map the VDSO directly 73 // rather than using a normal loading process, we require that the PT_LOAD 74 // segments have the same layout in the ELF as they expect to have in memory. 75 // 76 // Namely, this means that we must verify: 77 // - PT_LOAD file offsets are equivalent to the memory offset from the first 78 // segment. 79 // - No extra zeroed space (memsz) is required. 80 // - PT_LOAD segments are in order. 81 // - No two PT_LOAD segments occupy parts of the same page. 82 // - PT_LOAD segments don't extend beyond the end of the file. 83 // 84 // ctx may be nil if f does not need it. 85 func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) { 86 info, err := parseHeader(ctx, f) 87 if err != nil { 88 log.Infof("Unable to parse VDSO header: %v", err) 89 return elfInfo{}, err 90 } 91 92 var first *elf.ProgHeader 93 var prev *elf.ProgHeader 94 var prevEnd hostarch.Addr 95 for i, phdr := range info.phdrs { 96 if phdr.Type != elf.PT_LOAD { 97 continue 98 } 99 100 if first == nil { 101 first = &info.phdrs[i] 102 if phdr.Off != 0 { 103 log.Warningf("First PT_LOAD segment has non-zero file offset") 104 return elfInfo{}, linuxerr.ENOEXEC 105 } 106 } 107 108 memoryOffset := phdr.Vaddr - first.Vaddr 109 if memoryOffset != phdr.Off { 110 log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) 111 return elfInfo{}, linuxerr.ENOEXEC 112 } 113 114 // memsz larger than filesz means that extra zeroed space should be 115 // provided at the end of the segment. Since we are mapping the ELF 116 // directly, we don't want to just overwrite part of the ELF with 117 // zeroes. 118 if phdr.Memsz != phdr.Filesz { 119 log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) 120 return elfInfo{}, linuxerr.ENOEXEC 121 } 122 123 start := hostarch.Addr(memoryOffset) 124 end, ok := start.AddLength(phdr.Memsz) 125 if !ok { 126 log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) 127 return elfInfo{}, linuxerr.ENOEXEC 128 } 129 if uint64(end) > size { 130 log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) 131 return elfInfo{}, linuxerr.ENOEXEC 132 } 133 134 if prev != nil { 135 if start < prevEnd { 136 log.Warningf("PT_LOAD segments out of order") 137 return elfInfo{}, linuxerr.ENOEXEC 138 } 139 140 // We mprotect entire pages, so each segment must be in 141 // its own page. 142 prevEndPage := prevEnd.RoundDown() 143 startPage := start.RoundDown() 144 if prevEndPage >= startPage { 145 log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) 146 return elfInfo{}, linuxerr.ENOEXEC 147 } 148 } 149 prev = &info.phdrs[i] 150 prevEnd = end 151 } 152 153 return info, nil 154 } 155 156 // VDSO describes a VDSO. 157 // 158 // NOTE(mpratt): to support multiple architectures or operating systems, this 159 // would need to contain a VDSO for each. 160 // 161 // +stateify savable 162 type VDSO struct { 163 // ParamPage is the VDSO parameter page. This page should be updated to 164 // inform the VDSO for timekeeping data. 165 ParamPage *mm.SpecialMappable 166 167 // vdso is the VDSO ELF itself. 168 vdso *mm.SpecialMappable 169 170 // os is the operating system targeted by the VDSO. 171 os abi.OS 172 173 // arch is the architecture targeted by the VDSO. 174 arch arch.Arch 175 176 // phdrs are the VDSO ELF phdrs. 177 phdrs []elf.ProgHeader `state:".([]elfProgHeader)"` 178 } 179 180 // PrepareVDSO validates the system VDSO and returns a VDSO, containing the 181 // param page for updating by the kernel. 182 func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { 183 vdsoFile := &byteFullReader{data: vdsodata.Binary} 184 185 // First make sure the VDSO is valid. vdsoFile does not use ctx, so a 186 // nil context can be passed. 187 info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsodata.Binary))) 188 if err != nil { 189 return nil, err 190 } 191 192 // Then copy it into a VDSO mapping. 193 size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp() 194 if !ok { 195 return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary)) 196 } 197 198 mf := mfp.MemoryFile() 199 vdso, err := mf.Allocate(uint64(size), pgalloc.AllocOpts{Kind: usage.System}) 200 if err != nil { 201 return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) 202 } 203 204 ims, err := mf.MapInternal(vdso, hostarch.ReadWrite) 205 if err != nil { 206 mf.DecRef(vdso) 207 return nil, fmt.Errorf("unable to map VDSO memory: %v", err) 208 } 209 210 _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsodata.Binary))) 211 if err != nil { 212 mf.DecRef(vdso) 213 return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err) 214 } 215 216 // Finally, allocate a param page for this VDSO. 217 paramPage, err := mf.Allocate(hostarch.PageSize, pgalloc.AllocOpts{Kind: usage.System}) 218 if err != nil { 219 mf.DecRef(vdso) 220 return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) 221 } 222 223 return &VDSO{ 224 ParamPage: mm.NewSpecialMappable("[vvar]", mfp, paramPage), 225 // TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as 226 // some applications may not be able to handle multiple [vdso] 227 // hints. 228 vdso: mm.NewSpecialMappable("", mfp, vdso), 229 os: info.os, 230 arch: info.arch, 231 phdrs: info.phdrs, 232 }, nil 233 } 234 235 // loadVDSO loads the VDSO into m. 236 // 237 // VDSOs are special. 238 // 239 // VDSOs are fully position independent. However, instead of loading a VDSO 240 // like a normal ELF binary, mapping only the PT_LOAD segments, the Linux 241 // kernel simply directly maps the entire file into process memory, with very 242 // little real ELF parsing. 243 // 244 // NOTE(b/25323870): This means that userspace can, and unfortunately does, 245 // depend on parts of the ELF that would normally not be mapped. To maintain 246 // compatibility with such binaries, we load the VDSO much like Linux. 247 // 248 // loadVDSO takes a reference on the VDSO and parameter page FrameRegions. 249 func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) { 250 if v.os != bin.os { 251 ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) 252 return 0, linuxerr.ENOEXEC 253 } 254 if v.arch != bin.arch { 255 ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) 256 return 0, linuxerr.ENOEXEC 257 } 258 259 // Reserve address space for the VDSO and its parameter page, which is 260 // mapped just before the VDSO. 261 mapSize := v.vdso.Length() + v.ParamPage.Length() 262 addr, err := m.MMap(ctx, memmap.MMapOpts{ 263 Length: mapSize, 264 Private: true, 265 }) 266 if err != nil { 267 ctx.Infof("Unable to reserve VDSO address space: %v", err) 268 return 0, err 269 } 270 271 // Now map the param page. 272 _, err = m.MMap(ctx, memmap.MMapOpts{ 273 Length: v.ParamPage.Length(), 274 MappingIdentity: v.ParamPage, 275 Mappable: v.ParamPage, 276 Addr: addr, 277 Fixed: true, 278 Unmap: true, 279 Private: true, 280 Perms: hostarch.Read, 281 MaxPerms: hostarch.Read, 282 }) 283 if err != nil { 284 ctx.Infof("Unable to map VDSO param page: %v", err) 285 return 0, err 286 } 287 288 // Now map the VDSO itself. 289 vdsoAddr, ok := addr.AddLength(v.ParamPage.Length()) 290 if !ok { 291 panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length())) 292 } 293 _, err = m.MMap(ctx, memmap.MMapOpts{ 294 Length: v.vdso.Length(), 295 MappingIdentity: v.vdso, 296 Mappable: v.vdso, 297 Addr: vdsoAddr, 298 Fixed: true, 299 Unmap: true, 300 Private: true, 301 Perms: hostarch.Read, 302 MaxPerms: hostarch.AnyAccess, 303 }) 304 if err != nil { 305 ctx.Infof("Unable to map VDSO: %v", err) 306 return 0, err 307 } 308 309 vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length()) 310 if !ok { 311 panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length())) 312 } 313 314 // Set additional protections for the individual segments. 315 var first *elf.ProgHeader 316 for i, phdr := range v.phdrs { 317 if phdr.Type != elf.PT_LOAD { 318 continue 319 } 320 321 if first == nil { 322 first = &v.phdrs[i] 323 } 324 325 memoryOffset := phdr.Vaddr - first.Vaddr 326 segAddr, ok := vdsoAddr.AddLength(memoryOffset) 327 if !ok { 328 ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) 329 return 0, linuxerr.ENOEXEC 330 } 331 segPage := segAddr.RoundDown() 332 segSize := hostarch.Addr(phdr.Memsz) 333 segSize, ok = segSize.AddLength(segAddr.PageOffset()) 334 if !ok { 335 ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) 336 return 0, linuxerr.ENOEXEC 337 } 338 segSize, ok = segSize.RoundUp() 339 if !ok { 340 ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) 341 return 0, linuxerr.ENOEXEC 342 } 343 segEnd, ok := segPage.AddLength(uint64(segSize)) 344 if !ok { 345 ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) 346 return 0, linuxerr.ENOEXEC 347 } 348 if segEnd > vdsoEnd { 349 ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) 350 return 0, linuxerr.ENOEXEC 351 } 352 353 perms := progFlagsAsPerms(phdr.Flags) 354 if perms != hostarch.Read { 355 if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { 356 ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) 357 return 0, linuxerr.ENOEXEC 358 } 359 } 360 } 361 362 return vdsoAddr, nil 363 } 364 365 // Release drops references on mappings held by v. 366 func (v *VDSO) Release(ctx context.Context) { 367 v.ParamPage.DecRef(ctx) 368 v.vdso.DecRef(ctx) 369 } 370 371 var vdsoSigreturnOffset = func() uint64 { 372 f, err := elf.NewFile(bytes.NewReader(vdsodata.Binary)) 373 if err != nil { 374 panic(fmt.Sprintf("failed to parse vdso.so as ELF file: %v", err)) 375 } 376 syms, err := f.Symbols() 377 if err != nil { 378 panic(fmt.Sprintf("failed to read symbols from vdso.so: %v", err)) 379 } 380 const sigreturnSymbol = "__kernel_rt_sigreturn" 381 for _, sym := range syms { 382 if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF && sym.Name == sigreturnSymbol { 383 return sym.Value 384 } 385 } 386 panic(fmt.Sprintf("no symbol %q in vdso.so", sigreturnSymbol)) 387 }()