github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/physical_map.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kvm
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  
    21  	"golang.org/x/sys/unix"
    22  	"github.com/metacubex/gvisor/pkg/hostarch"
    23  	"github.com/metacubex/gvisor/pkg/log"
    24  	"github.com/metacubex/gvisor/pkg/ring0"
    25  )
    26  
    27  type region struct {
    28  	virtual uintptr
    29  	length  uintptr
    30  }
    31  
    32  type physicalRegion struct {
    33  	region
    34  	physical uintptr
    35  	readOnly bool
    36  }
    37  
    38  // physicalRegions contains a list of available physical regions.
    39  //
    40  // The physical value used in physicalRegions is a number indicating the
    41  // physical offset, aligned appropriately and starting above reservedMemory.
    42  var physicalRegions []physicalRegion
    43  
    44  // fillAddressSpace fills the host address space with PROT_NONE mappings until
    45  // we have a host address space size that is less than or equal to the physical
    46  // address space. This allows us to have an injective host virtual to guest
    47  // physical mapping.
    48  //
    49  // The excluded regions are returned.
    50  func fillAddressSpace() (excludedRegions []region) {
    51  	// We can cut vSize in half, because the kernel will be using the top
    52  	// half and we ignore it while constructing mappings. It's as if we've
    53  	// already excluded half the possible addresses.
    54  	vSize := ring0.UserspaceSize
    55  
    56  	// We exclude reservedMemory below from our physical memory size, so it
    57  	// needs to be dropped here as well. Otherwise, we could end up with
    58  	// physical addresses that are beyond what is mapped.
    59  	pSize := uintptr(1) << ring0.PhysicalAddressBits
    60  	pSize -= reservedMemory
    61  
    62  	// Add specifically excluded regions; see excludeVirtualRegion.
    63  	if err := applyVirtualRegions(func(vr virtualRegion) {
    64  		if excludeVirtualRegion(vr) {
    65  			excludedRegions = append(excludedRegions, vr.region)
    66  			vSize -= vr.length
    67  			log.Infof("excluded: virtual [%x,%x)", vr.virtual, vr.virtual+vr.length)
    68  		}
    69  	}); err != nil {
    70  		panic(fmt.Sprintf("error parsing /proc/self/maps: %v", err))
    71  	}
    72  
    73  	// Do we need any more work?
    74  	if vSize < pSize {
    75  		return excludedRegions
    76  	}
    77  
    78  	// Calculate the required space and fill it.
    79  	//
    80  	// Note carefully that we add faultBlockSize to required up front, and
    81  	// on each iteration of the loop below (i.e. each new physical region
    82  	// we define), we add faultBlockSize again. This is done because the
    83  	// computation of physical regions will ensure proper alignments with
    84  	// faultBlockSize, potentially causing up to faultBlockSize bytes in
    85  	// internal fragmentation for each physical region. So we need to
    86  	// account for this properly during allocation.
    87  	requiredAddr, ok := hostarch.Addr(vSize - pSize + faultBlockSize).RoundUp()
    88  	if !ok {
    89  		panic(fmt.Sprintf(
    90  			"overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)",
    91  			vSize, pSize, faultBlockSize))
    92  	}
    93  	required := uintptr(requiredAddr)
    94  	current := required // Attempted mmap size.
    95  	for filled := uintptr(0); filled < required && current > 0; {
    96  		addr, _, errno := unix.RawSyscall6(
    97  			unix.SYS_MMAP,
    98  			0, // Suggested address.
    99  			current,
   100  			unix.PROT_NONE,
   101  			unix.MAP_ANONYMOUS|unix.MAP_PRIVATE|unix.MAP_NORESERVE,
   102  			0, 0)
   103  		if errno != 0 {
   104  			// One page is the smallest mapping that can be allocated.
   105  			if current == hostarch.PageSize {
   106  				current = 0
   107  				break
   108  			}
   109  			// Attempt half the size; overflow not possible.
   110  			currentAddr, _ := hostarch.Addr(current >> 1).RoundUp()
   111  			current = uintptr(currentAddr)
   112  			continue
   113  		}
   114  		// We filled a block.
   115  		filled += current
   116  		// Check whether a new region is merged with a previous one.
   117  		for i := range excludedRegions {
   118  			if excludedRegions[i].virtual == addr+current {
   119  				excludedRegions[i].virtual = addr
   120  				excludedRegions[i].length += current
   121  				addr = 0
   122  				break
   123  			}
   124  			if excludedRegions[i].virtual+excludedRegions[i].length == addr {
   125  				excludedRegions[i].length += current
   126  				addr = 0
   127  				break
   128  			}
   129  		}
   130  		if addr != 0 {
   131  			excludedRegions = append(excludedRegions, region{
   132  				virtual: addr,
   133  				length:  current,
   134  			})
   135  			// See comment above.
   136  			if filled != required {
   137  				required += faultBlockSize
   138  			}
   139  		}
   140  	}
   141  	if current == 0 {
   142  		panic("filling address space failed")
   143  	}
   144  	sort.Slice(excludedRegions, func(i, j int) bool {
   145  		return excludedRegions[i].virtual < excludedRegions[j].virtual
   146  	})
   147  	for _, r := range excludedRegions {
   148  		log.Infof("region: virtual [%x,%x)", r.virtual, r.virtual+r.length)
   149  	}
   150  	return excludedRegions
   151  }
   152  
   153  // computePhysicalRegions computes physical regions.
   154  func computePhysicalRegions(excludedRegions []region) (physicalRegions []physicalRegion) {
   155  	physical := uintptr(reservedMemory)
   156  	addValidRegion := func(virtual, length uintptr) {
   157  		if length == 0 {
   158  			return
   159  		}
   160  		if virtual == 0 {
   161  			virtual += hostarch.PageSize
   162  			length -= hostarch.PageSize
   163  		}
   164  		if end := virtual + length; end > ring0.MaximumUserAddress {
   165  			length -= (end - ring0.MaximumUserAddress)
   166  		}
   167  		if length == 0 {
   168  			return
   169  		}
   170  		// Round physical up to the same alignment as the virtual
   171  		// address (with respect to faultBlockSize).
   172  		if offset := virtual &^ faultBlockMask; physical&^faultBlockMask != offset {
   173  			if newPhysical := (physical & faultBlockMask) + offset; newPhysical > physical {
   174  				physical = newPhysical // Round up by only a little bit.
   175  			} else {
   176  				physical = ((physical + faultBlockSize) & faultBlockMask) + offset
   177  			}
   178  		}
   179  		physicalRegions = append(physicalRegions, physicalRegion{
   180  			region: region{
   181  				virtual: virtual,
   182  				length:  length,
   183  			},
   184  			physical: physical,
   185  		})
   186  		physical += length
   187  	}
   188  	lastExcludedEnd := uintptr(0)
   189  	for _, r := range excludedRegions {
   190  		addValidRegion(lastExcludedEnd, r.virtual-lastExcludedEnd)
   191  		lastExcludedEnd = r.virtual + r.length
   192  	}
   193  	addValidRegion(lastExcludedEnd, ring0.MaximumUserAddress-lastExcludedEnd)
   194  
   195  	// Do arch-specific actions on physical regions.
   196  	physicalRegions = archPhysicalRegions(physicalRegions)
   197  
   198  	// Dump our all physical regions.
   199  	for _, r := range physicalRegions {
   200  		log.Infof("physicalRegion: virtual [%x,%x) => physical [%x,%x)",
   201  			r.virtual, r.virtual+r.length, r.physical, r.physical+r.length)
   202  	}
   203  	return physicalRegions
   204  }
   205  
   206  // physicalInit initializes physical address mappings.
   207  func physicalInit() {
   208  	physicalRegions = computePhysicalRegions(fillAddressSpace())
   209  }
   210  
   211  // applyPhysicalRegions applies the given function on physical regions.
   212  //
   213  // Iteration continues as long as true is returned. The return value is the
   214  // return from the last call to fn, or true if there are no entries.
   215  //
   216  // Precondition: physicalInit must have been called.
   217  func applyPhysicalRegions(fn func(pr physicalRegion) bool) bool {
   218  	for _, pr := range physicalRegions {
   219  		if !fn(pr) {
   220  			return false
   221  		}
   222  	}
   223  	return true
   224  }
   225  
   226  // translateToPhysical translates the given virtual address.
   227  //
   228  // Precondition: physicalInit must have been called.
   229  //
   230  //go:nosplit
   231  func translateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) {
   232  	for _, pr := range physicalRegions {
   233  		if pr.virtual <= virtual && virtual < pr.virtual+pr.length {
   234  			physical = pr.physical + (virtual - pr.virtual)
   235  			length = pr.length - (virtual - pr.virtual)
   236  			ok = true
   237  			return
   238  		}
   239  	}
   240  	return
   241  }