github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/loader/vdso.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package loader 16 17 import ( 18 "bytes" 19 "debug/elf" 20 "fmt" 21 "io" 22 23 "github.com/metacubex/gvisor/pkg/abi" 24 "github.com/metacubex/gvisor/pkg/context" 25 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 26 "github.com/metacubex/gvisor/pkg/hostarch" 27 "github.com/metacubex/gvisor/pkg/log" 28 "github.com/metacubex/gvisor/pkg/safemem" 29 "github.com/metacubex/gvisor/pkg/sentry/arch" 30 "github.com/metacubex/gvisor/pkg/sentry/loader/vdsodata" 31 "github.com/metacubex/gvisor/pkg/sentry/memmap" 32 "github.com/metacubex/gvisor/pkg/sentry/mm" 33 "github.com/metacubex/gvisor/pkg/sentry/pgalloc" 34 "github.com/metacubex/gvisor/pkg/sentry/uniqueid" 35 "github.com/metacubex/gvisor/pkg/sentry/usage" 36 "github.com/metacubex/gvisor/pkg/usermem" 37 ) 38 39 const vdsoPrelink = 0xffffffffff700000 40 41 type fileContext struct { 42 context.Context 43 } 44 45 func (f *fileContext) Value(key any) any { 46 switch key { 47 case uniqueid.CtxGlobalUniqueID: 48 return uint64(0) 49 default: 50 return f.Context.Value(key) 51 } 52 } 53 54 type byteFullReader struct { 55 data []byte 56 } 57 58 // ReadFull implements fullReader.ReadFull. 59 func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { 60 if offset < 0 { 61 return 0, linuxerr.EINVAL 62 } 63 if offset >= int64(len(b.data)) { 64 return 0, io.EOF 65 } 66 n, err := dst.CopyOut(ctx, b.data[offset:]) 67 return int64(n), err 68 } 69 70 // validateVDSO checks that the VDSO can be loaded by loadVDSO. 71 // 72 // VDSOs are special (see below). Since we are going to map the VDSO directly 73 // rather than using a normal loading process, we require that the PT_LOAD 74 // segments have the same layout in the ELF as they expect to have in memory. 75 // 76 // Namely, this means that we must verify: 77 // - PT_LOAD file offsets are equivalent to the memory offset from the first 78 // segment. 79 // - No extra zeroed space (memsz) is required. 80 // - PT_LOAD segments are in order. 81 // - No two PT_LOAD segments occupy parts of the same page. 82 // - PT_LOAD segments don't extend beyond the end of the file. 83 // 84 // ctx may be nil if f does not need it. 85 func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) { 86 info, err := parseHeader(ctx, f) 87 if err != nil { 88 log.Infof("Unable to parse VDSO header: %v", err) 89 return elfInfo{}, err 90 } 91 92 var first *elf.ProgHeader 93 var prev *elf.ProgHeader 94 var prevEnd hostarch.Addr 95 for i, phdr := range info.phdrs { 96 if phdr.Type != elf.PT_LOAD { 97 continue 98 } 99 100 if first == nil { 101 first = &info.phdrs[i] 102 if phdr.Off != 0 { 103 log.Warningf("First PT_LOAD segment has non-zero file offset") 104 return elfInfo{}, linuxerr.ENOEXEC 105 } 106 } 107 108 memoryOffset := phdr.Vaddr - first.Vaddr 109 if memoryOffset != phdr.Off { 110 log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) 111 return elfInfo{}, linuxerr.ENOEXEC 112 } 113 114 // memsz larger than filesz means that extra zeroed space should be 115 // provided at the end of the segment. Since we are mapping the ELF 116 // directly, we don't want to just overwrite part of the ELF with 117 // zeroes. 118 if phdr.Memsz != phdr.Filesz { 119 log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) 120 return elfInfo{}, linuxerr.ENOEXEC 121 } 122 123 start := hostarch.Addr(memoryOffset) 124 end, ok := start.AddLength(phdr.Memsz) 125 if !ok { 126 log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) 127 return elfInfo{}, linuxerr.ENOEXEC 128 } 129 if uint64(end) > size { 130 log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) 131 return elfInfo{}, linuxerr.ENOEXEC 132 } 133 134 if prev != nil { 135 if start < prevEnd { 136 log.Warningf("PT_LOAD segments out of order") 137 return elfInfo{}, linuxerr.ENOEXEC 138 } 139 140 // We mprotect entire pages, so each segment must be in 141 // its own page. 142 prevEndPage := prevEnd.RoundDown() 143 startPage := start.RoundDown() 144 if prevEndPage >= startPage { 145 log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) 146 return elfInfo{}, linuxerr.ENOEXEC 147 } 148 } 149 prev = &info.phdrs[i] 150 prevEnd = end 151 } 152 153 return info, nil 154 } 155 156 // VDSO describes a VDSO. 157 // 158 // NOTE(mpratt): to support multiple architectures or operating systems, this 159 // would need to contain a VDSO for each. 160 // 161 // +stateify savable 162 type VDSO struct { 163 // ParamPage is the VDSO parameter page. This page should be updated to 164 // inform the VDSO for timekeeping data. 165 ParamPage *mm.SpecialMappable 166 167 // vdso is the VDSO ELF itself. 168 vdso *mm.SpecialMappable 169 170 // os is the operating system targeted by the VDSO. 171 os abi.OS 172 173 // arch is the architecture targeted by the VDSO. 174 arch arch.Arch 175 176 // phdrs are the VDSO ELF phdrs. 177 phdrs []elf.ProgHeader `state:".([]elfProgHeader)"` 178 } 179 180 // PrepareVDSO validates the system VDSO and returns a VDSO, containing the 181 // param page for updating by the kernel. 182 func PrepareVDSO(mf *pgalloc.MemoryFile) (*VDSO, error) { 183 vdsoFile := &byteFullReader{data: vdsodata.Binary} 184 185 // First make sure the VDSO is valid. vdsoFile does not use ctx, so a 186 // nil context can be passed. 187 info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsodata.Binary))) 188 if err != nil { 189 return nil, err 190 } 191 192 // Then copy it into a VDSO mapping. 193 size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp() 194 if !ok { 195 return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary)) 196 } 197 198 vdso, err := mf.Allocate(uint64(size), pgalloc.AllocOpts{Kind: usage.System}) 199 if err != nil { 200 return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) 201 } 202 203 ims, err := mf.MapInternal(vdso, hostarch.ReadWrite) 204 if err != nil { 205 mf.DecRef(vdso) 206 return nil, fmt.Errorf("unable to map VDSO memory: %v", err) 207 } 208 209 _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsodata.Binary))) 210 if err != nil { 211 mf.DecRef(vdso) 212 return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err) 213 } 214 215 // Finally, allocate a param page for this VDSO. 216 paramPage, err := mf.Allocate(hostarch.PageSize, pgalloc.AllocOpts{Kind: usage.System}) 217 if err != nil { 218 mf.DecRef(vdso) 219 return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) 220 } 221 222 return &VDSO{ 223 ParamPage: mm.NewSpecialMappable("[vvar]", mf, paramPage), 224 // TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as 225 // some applications may not be able to handle multiple [vdso] 226 // hints. 227 vdso: mm.NewSpecialMappable("", mf, vdso), 228 os: info.os, 229 arch: info.arch, 230 phdrs: info.phdrs, 231 }, nil 232 } 233 234 // loadVDSO loads the VDSO into m. 235 // 236 // VDSOs are special. 237 // 238 // VDSOs are fully position independent. However, instead of loading a VDSO 239 // like a normal ELF binary, mapping only the PT_LOAD segments, the Linux 240 // kernel simply directly maps the entire file into process memory, with very 241 // little real ELF parsing. 242 // 243 // NOTE(b/25323870): This means that userspace can, and unfortunately does, 244 // depend on parts of the ELF that would normally not be mapped. To maintain 245 // compatibility with such binaries, we load the VDSO much like Linux. 246 // 247 // loadVDSO takes a reference on the VDSO and parameter page FrameRegions. 248 func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) { 249 if v.os != bin.os { 250 ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) 251 return 0, linuxerr.ENOEXEC 252 } 253 if v.arch != bin.arch { 254 ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) 255 return 0, linuxerr.ENOEXEC 256 } 257 258 // Reserve address space for the VDSO and its parameter page, which is 259 // mapped just before the VDSO. 260 mapSize := v.vdso.Length() + v.ParamPage.Length() 261 addr, err := m.MMap(ctx, memmap.MMapOpts{ 262 Length: mapSize, 263 Private: true, 264 }) 265 if err != nil { 266 ctx.Infof("Unable to reserve VDSO address space: %v", err) 267 return 0, err 268 } 269 270 // Now map the param page. 271 _, err = m.MMap(ctx, memmap.MMapOpts{ 272 Length: v.ParamPage.Length(), 273 MappingIdentity: v.ParamPage, 274 Mappable: v.ParamPage, 275 Addr: addr, 276 Fixed: true, 277 Unmap: true, 278 Private: true, 279 Perms: hostarch.Read, 280 MaxPerms: hostarch.Read, 281 }) 282 if err != nil { 283 ctx.Infof("Unable to map VDSO param page: %v", err) 284 return 0, err 285 } 286 287 // Now map the VDSO itself. 288 vdsoAddr, ok := addr.AddLength(v.ParamPage.Length()) 289 if !ok { 290 panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length())) 291 } 292 _, err = m.MMap(ctx, memmap.MMapOpts{ 293 Length: v.vdso.Length(), 294 MappingIdentity: v.vdso, 295 Mappable: v.vdso, 296 Addr: vdsoAddr, 297 Fixed: true, 298 Unmap: true, 299 Private: true, 300 Perms: hostarch.Read, 301 MaxPerms: hostarch.AnyAccess, 302 }) 303 if err != nil { 304 ctx.Infof("Unable to map VDSO: %v", err) 305 return 0, err 306 } 307 308 vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length()) 309 if !ok { 310 panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length())) 311 } 312 313 // Set additional protections for the individual segments. 314 var first *elf.ProgHeader 315 for i, phdr := range v.phdrs { 316 if phdr.Type != elf.PT_LOAD { 317 continue 318 } 319 320 if first == nil { 321 first = &v.phdrs[i] 322 } 323 324 memoryOffset := phdr.Vaddr - first.Vaddr 325 segAddr, ok := vdsoAddr.AddLength(memoryOffset) 326 if !ok { 327 ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) 328 return 0, linuxerr.ENOEXEC 329 } 330 segPage := segAddr.RoundDown() 331 segSize := hostarch.Addr(phdr.Memsz) 332 segSize, ok = segSize.AddLength(segAddr.PageOffset()) 333 if !ok { 334 ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) 335 return 0, linuxerr.ENOEXEC 336 } 337 segSize, ok = segSize.RoundUp() 338 if !ok { 339 ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) 340 return 0, linuxerr.ENOEXEC 341 } 342 segEnd, ok := segPage.AddLength(uint64(segSize)) 343 if !ok { 344 ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) 345 return 0, linuxerr.ENOEXEC 346 } 347 if segEnd > vdsoEnd { 348 ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) 349 return 0, linuxerr.ENOEXEC 350 } 351 352 perms := progFlagsAsPerms(phdr.Flags) 353 if perms != hostarch.Read { 354 if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { 355 ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) 356 return 0, linuxerr.ENOEXEC 357 } 358 } 359 } 360 361 return vdsoAddr, nil 362 } 363 364 // Release drops references on mappings held by v. 365 func (v *VDSO) Release(ctx context.Context) { 366 v.ParamPage.DecRef(ctx) 367 v.vdso.DecRef(ctx) 368 } 369 370 var vdsoSigreturnOffset = func() uint64 { 371 f, err := elf.NewFile(bytes.NewReader(vdsodata.Binary)) 372 if err != nil { 373 panic(fmt.Sprintf("failed to parse vdso.so as ELF file: %v", err)) 374 } 375 syms, err := f.Symbols() 376 if err != nil { 377 panic(fmt.Sprintf("failed to read symbols from vdso.so: %v", err)) 378 } 379 const sigreturnSymbol = "__kernel_rt_sigreturn" 380 for _, sym := range syms { 381 if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF && sym.Name == sigreturnSymbol { 382 return sym.Value 383 } 384 } 385 panic(fmt.Sprintf("no symbol %q in vdso.so", sigreturnSymbol)) 386 }()