github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/loader/vdso.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package loader 16 17 import ( 18 "bytes" 19 "debug/elf" 20 "fmt" 21 "io" 22 "strings" 23 24 "github.com/SagerNet/gvisor/pkg/abi" 25 "github.com/SagerNet/gvisor/pkg/context" 26 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 27 "github.com/SagerNet/gvisor/pkg/hostarch" 28 "github.com/SagerNet/gvisor/pkg/log" 29 "github.com/SagerNet/gvisor/pkg/safemem" 30 "github.com/SagerNet/gvisor/pkg/sentry/arch" 31 "github.com/SagerNet/gvisor/pkg/sentry/loader/vdsodata" 32 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 33 "github.com/SagerNet/gvisor/pkg/sentry/mm" 34 "github.com/SagerNet/gvisor/pkg/sentry/pgalloc" 35 "github.com/SagerNet/gvisor/pkg/sentry/uniqueid" 36 "github.com/SagerNet/gvisor/pkg/sentry/usage" 37 "github.com/SagerNet/gvisor/pkg/syserror" 38 "github.com/SagerNet/gvisor/pkg/usermem" 39 ) 40 41 const vdsoPrelink = 0xffffffffff700000 42 43 type fileContext struct { 44 context.Context 45 } 46 47 func (f *fileContext) Value(key interface{}) interface{} { 48 switch key { 49 case uniqueid.CtxGlobalUniqueID: 50 return uint64(0) 51 default: 52 return f.Context.Value(key) 53 } 54 } 55 56 type byteFullReader struct { 57 data []byte 58 } 59 60 func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { 61 if offset < 0 { 62 return 0, linuxerr.EINVAL 63 } 64 if offset >= int64(len(b.data)) { 65 return 0, io.EOF 66 } 67 n, err := dst.CopyOut(ctx, b.data[offset:]) 68 return int64(n), err 69 } 70 71 // validateVDSO checks that the VDSO can be loaded by loadVDSO. 72 // 73 // VDSOs are special (see below). Since we are going to map the VDSO directly 74 // rather than using a normal loading process, we require that the PT_LOAD 75 // segments have the same layout in the ELF as they expect to have in memory. 76 // 77 // Namely, this means that we must verify: 78 // * PT_LOAD file offsets are equivalent to the memory offset from the first 79 // segment. 80 // * No extra zeroed space (memsz) is required. 81 // * PT_LOAD segments are in order. 82 // * No two PT_LOAD segments occupy parts of the same page. 83 // * PT_LOAD segments don't extend beyond the end of the file. 84 // 85 // ctx may be nil if f does not need it. 86 func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) { 87 info, err := parseHeader(ctx, f) 88 if err != nil { 89 log.Infof("Unable to parse VDSO header: %v", err) 90 return elfInfo{}, err 91 } 92 93 var first *elf.ProgHeader 94 var prev *elf.ProgHeader 95 var prevEnd hostarch.Addr 96 for i, phdr := range info.phdrs { 97 if phdr.Type != elf.PT_LOAD { 98 continue 99 } 100 101 if first == nil { 102 first = &info.phdrs[i] 103 if phdr.Off != 0 { 104 log.Warningf("First PT_LOAD segment has non-zero file offset") 105 return elfInfo{}, syserror.ENOEXEC 106 } 107 } 108 109 memoryOffset := phdr.Vaddr - first.Vaddr 110 if memoryOffset != phdr.Off { 111 log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) 112 return elfInfo{}, syserror.ENOEXEC 113 } 114 115 // memsz larger than filesz means that extra zeroed space should be 116 // provided at the end of the segment. Since we are mapping the ELF 117 // directly, we don't want to just overwrite part of the ELF with 118 // zeroes. 119 if phdr.Memsz != phdr.Filesz { 120 log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) 121 return elfInfo{}, syserror.ENOEXEC 122 } 123 124 start := hostarch.Addr(memoryOffset) 125 end, ok := start.AddLength(phdr.Memsz) 126 if !ok { 127 log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) 128 return elfInfo{}, syserror.ENOEXEC 129 } 130 if uint64(end) > size { 131 log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) 132 return elfInfo{}, syserror.ENOEXEC 133 } 134 135 if prev != nil { 136 if start < prevEnd { 137 log.Warningf("PT_LOAD segments out of order") 138 return elfInfo{}, syserror.ENOEXEC 139 } 140 141 // We mprotect entire pages, so each segment must be in 142 // its own page. 143 prevEndPage := prevEnd.RoundDown() 144 startPage := start.RoundDown() 145 if prevEndPage >= startPage { 146 log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) 147 return elfInfo{}, syserror.ENOEXEC 148 } 149 } 150 prev = &info.phdrs[i] 151 prevEnd = end 152 } 153 154 return info, nil 155 } 156 157 // VDSO describes a VDSO. 158 // 159 // NOTE(mpratt): to support multiple architectures or operating systems, this 160 // would need to contain a VDSO for each. 161 // 162 // +stateify savable 163 type VDSO struct { 164 // ParamPage is the VDSO parameter page. This page should be updated to 165 // inform the VDSO for timekeeping data. 166 ParamPage *mm.SpecialMappable 167 168 // vdso is the VDSO ELF itself. 169 vdso *mm.SpecialMappable 170 171 // os is the operating system targeted by the VDSO. 172 os abi.OS 173 174 // arch is the architecture targeted by the VDSO. 175 arch arch.Arch 176 177 // phdrs are the VDSO ELF phdrs. 178 phdrs []elf.ProgHeader `state:".([]elfProgHeader)"` 179 } 180 181 // getSymbolValueFromVDSO returns the specific symbol value in vdso.so. 182 func getSymbolValueFromVDSO(symbol string) (uint64, error) { 183 f, err := elf.NewFile(bytes.NewReader(vdsodata.Binary)) 184 if err != nil { 185 return 0, err 186 } 187 syms, err := f.Symbols() 188 if err != nil { 189 return 0, err 190 } 191 192 for _, sym := range syms { 193 if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF { 194 if strings.Contains(sym.Name, symbol) { 195 return sym.Value, nil 196 } 197 } 198 } 199 return 0, fmt.Errorf("no %v in vdso.so", symbol) 200 } 201 202 // PrepareVDSO validates the system VDSO and returns a VDSO, containing the 203 // param page for updating by the kernel. 204 func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { 205 vdsoFile := &byteFullReader{data: vdsodata.Binary} 206 207 // First make sure the VDSO is valid. vdsoFile does not use ctx, so a 208 // nil context can be passed. 209 info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsodata.Binary))) 210 if err != nil { 211 return nil, err 212 } 213 214 // Then copy it into a VDSO mapping. 215 size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp() 216 if !ok { 217 return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary)) 218 } 219 220 mf := mfp.MemoryFile() 221 vdso, err := mf.Allocate(uint64(size), usage.System) 222 if err != nil { 223 return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) 224 } 225 226 ims, err := mf.MapInternal(vdso, hostarch.ReadWrite) 227 if err != nil { 228 mf.DecRef(vdso) 229 return nil, fmt.Errorf("unable to map VDSO memory: %v", err) 230 } 231 232 _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsodata.Binary))) 233 if err != nil { 234 mf.DecRef(vdso) 235 return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err) 236 } 237 238 // Finally, allocate a param page for this VDSO. 239 paramPage, err := mf.Allocate(hostarch.PageSize, usage.System) 240 if err != nil { 241 mf.DecRef(vdso) 242 return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) 243 } 244 245 return &VDSO{ 246 ParamPage: mm.NewSpecialMappable("[vvar]", mfp, paramPage), 247 // TODO(github.com/SagerNet/issue/157): Don't advertise the VDSO, as 248 // some applications may not be able to handle multiple [vdso] 249 // hints. 250 vdso: mm.NewSpecialMappable("", mfp, vdso), 251 os: info.os, 252 arch: info.arch, 253 phdrs: info.phdrs, 254 }, nil 255 } 256 257 // loadVDSO loads the VDSO into m. 258 // 259 // VDSOs are special. 260 // 261 // VDSOs are fully position independent. However, instead of loading a VDSO 262 // like a normal ELF binary, mapping only the PT_LOAD segments, the Linux 263 // kernel simply directly maps the entire file into process memory, with very 264 // little real ELF parsing. 265 // 266 // NOTE(b/25323870): This means that userspace can, and unfortunately does, 267 // depend on parts of the ELF that would normally not be mapped. To maintain 268 // compatibility with such binaries, we load the VDSO much like Linux. 269 // 270 // loadVDSO takes a reference on the VDSO and parameter page FrameRegions. 271 func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) { 272 if v.os != bin.os { 273 ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) 274 return 0, syserror.ENOEXEC 275 } 276 if v.arch != bin.arch { 277 ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) 278 return 0, syserror.ENOEXEC 279 } 280 281 // Reserve address space for the VDSO and its parameter page, which is 282 // mapped just before the VDSO. 283 mapSize := v.vdso.Length() + v.ParamPage.Length() 284 addr, err := m.MMap(ctx, memmap.MMapOpts{ 285 Length: mapSize, 286 Private: true, 287 }) 288 if err != nil { 289 ctx.Infof("Unable to reserve VDSO address space: %v", err) 290 return 0, err 291 } 292 293 // Now map the param page. 294 _, err = m.MMap(ctx, memmap.MMapOpts{ 295 Length: v.ParamPage.Length(), 296 MappingIdentity: v.ParamPage, 297 Mappable: v.ParamPage, 298 Addr: addr, 299 Fixed: true, 300 Unmap: true, 301 Private: true, 302 Perms: hostarch.Read, 303 MaxPerms: hostarch.Read, 304 }) 305 if err != nil { 306 ctx.Infof("Unable to map VDSO param page: %v", err) 307 return 0, err 308 } 309 310 // Now map the VDSO itself. 311 vdsoAddr, ok := addr.AddLength(v.ParamPage.Length()) 312 if !ok { 313 panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length())) 314 } 315 _, err = m.MMap(ctx, memmap.MMapOpts{ 316 Length: v.vdso.Length(), 317 MappingIdentity: v.vdso, 318 Mappable: v.vdso, 319 Addr: vdsoAddr, 320 Fixed: true, 321 Unmap: true, 322 Private: true, 323 Perms: hostarch.Read, 324 MaxPerms: hostarch.AnyAccess, 325 }) 326 if err != nil { 327 ctx.Infof("Unable to map VDSO: %v", err) 328 return 0, err 329 } 330 331 vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length()) 332 if !ok { 333 panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length())) 334 } 335 336 // Set additional protections for the individual segments. 337 var first *elf.ProgHeader 338 for i, phdr := range v.phdrs { 339 if phdr.Type != elf.PT_LOAD { 340 continue 341 } 342 343 if first == nil { 344 first = &v.phdrs[i] 345 } 346 347 memoryOffset := phdr.Vaddr - first.Vaddr 348 segAddr, ok := vdsoAddr.AddLength(memoryOffset) 349 if !ok { 350 ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) 351 return 0, syserror.ENOEXEC 352 } 353 segPage := segAddr.RoundDown() 354 segSize := hostarch.Addr(phdr.Memsz) 355 segSize, ok = segSize.AddLength(segAddr.PageOffset()) 356 if !ok { 357 ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) 358 return 0, syserror.ENOEXEC 359 } 360 segSize, ok = segSize.RoundUp() 361 if !ok { 362 ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) 363 return 0, syserror.ENOEXEC 364 } 365 segEnd, ok := segPage.AddLength(uint64(segSize)) 366 if !ok { 367 ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) 368 return 0, syserror.ENOEXEC 369 } 370 if segEnd > vdsoEnd { 371 ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) 372 return 0, syserror.ENOEXEC 373 } 374 375 perms := progFlagsAsPerms(phdr.Flags) 376 if perms != hostarch.Read { 377 if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { 378 ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) 379 return 0, syserror.ENOEXEC 380 } 381 } 382 } 383 384 return vdsoAddr, nil 385 } 386 387 // Release drops references on mappings held by v. 388 func (v *VDSO) Release(ctx context.Context) { 389 v.ParamPage.DecRef(ctx) 390 v.vdso.DecRef(ctx) 391 }