github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/physical_map.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kvm 16 17 import ( 18 "fmt" 19 "sort" 20 21 "golang.org/x/sys/unix" 22 "github.com/metacubex/gvisor/pkg/hostarch" 23 "github.com/metacubex/gvisor/pkg/log" 24 "github.com/metacubex/gvisor/pkg/ring0" 25 ) 26 27 type region struct { 28 virtual uintptr 29 length uintptr 30 } 31 32 type physicalRegion struct { 33 region 34 physical uintptr 35 readOnly bool 36 } 37 38 // physicalRegions contains a list of available physical regions. 39 // 40 // The physical value used in physicalRegions is a number indicating the 41 // physical offset, aligned appropriately and starting above reservedMemory. 42 var physicalRegions []physicalRegion 43 44 // fillAddressSpace fills the host address space with PROT_NONE mappings until 45 // we have a host address space size that is less than or equal to the physical 46 // address space. This allows us to have an injective host virtual to guest 47 // physical mapping. 48 // 49 // The excluded regions are returned. 50 func fillAddressSpace() (excludedRegions []region) { 51 // We can cut vSize in half, because the kernel will be using the top 52 // half and we ignore it while constructing mappings. It's as if we've 53 // already excluded half the possible addresses. 54 vSize := ring0.UserspaceSize 55 56 // We exclude reservedMemory below from our physical memory size, so it 57 // needs to be dropped here as well. Otherwise, we could end up with 58 // physical addresses that are beyond what is mapped. 59 pSize := uintptr(1) << ring0.PhysicalAddressBits 60 pSize -= reservedMemory 61 62 // Add specifically excluded regions; see excludeVirtualRegion. 63 if err := applyVirtualRegions(func(vr virtualRegion) { 64 if excludeVirtualRegion(vr) { 65 excludedRegions = append(excludedRegions, vr.region) 66 vSize -= vr.length 67 log.Infof("excluded: virtual [%x,%x)", vr.virtual, vr.virtual+vr.length) 68 } 69 }); err != nil { 70 panic(fmt.Sprintf("error parsing /proc/self/maps: %v", err)) 71 } 72 73 // Do we need any more work? 74 if vSize < pSize { 75 return excludedRegions 76 } 77 78 // Calculate the required space and fill it. 79 // 80 // Note carefully that we add faultBlockSize to required up front, and 81 // on each iteration of the loop below (i.e. each new physical region 82 // we define), we add faultBlockSize again. This is done because the 83 // computation of physical regions will ensure proper alignments with 84 // faultBlockSize, potentially causing up to faultBlockSize bytes in 85 // internal fragmentation for each physical region. So we need to 86 // account for this properly during allocation. 87 requiredAddr, ok := hostarch.Addr(vSize - pSize + faultBlockSize).RoundUp() 88 if !ok { 89 panic(fmt.Sprintf( 90 "overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)", 91 vSize, pSize, faultBlockSize)) 92 } 93 required := uintptr(requiredAddr) 94 current := required // Attempted mmap size. 95 for filled := uintptr(0); filled < required && current > 0; { 96 addr, _, errno := unix.RawSyscall6( 97 unix.SYS_MMAP, 98 0, // Suggested address. 99 current, 100 unix.PROT_NONE, 101 unix.MAP_ANONYMOUS|unix.MAP_PRIVATE|unix.MAP_NORESERVE, 102 0, 0) 103 if errno != 0 { 104 // One page is the smallest mapping that can be allocated. 105 if current == hostarch.PageSize { 106 current = 0 107 break 108 } 109 // Attempt half the size; overflow not possible. 110 currentAddr, _ := hostarch.Addr(current >> 1).RoundUp() 111 current = uintptr(currentAddr) 112 continue 113 } 114 // We filled a block. 115 filled += current 116 // Check whether a new region is merged with a previous one. 117 for i := range excludedRegions { 118 if excludedRegions[i].virtual == addr+current { 119 excludedRegions[i].virtual = addr 120 excludedRegions[i].length += current 121 addr = 0 122 break 123 } 124 if excludedRegions[i].virtual+excludedRegions[i].length == addr { 125 excludedRegions[i].length += current 126 addr = 0 127 break 128 } 129 } 130 if addr != 0 { 131 excludedRegions = append(excludedRegions, region{ 132 virtual: addr, 133 length: current, 134 }) 135 // See comment above. 136 if filled != required { 137 required += faultBlockSize 138 } 139 } 140 } 141 if current == 0 { 142 panic("filling address space failed") 143 } 144 sort.Slice(excludedRegions, func(i, j int) bool { 145 return excludedRegions[i].virtual < excludedRegions[j].virtual 146 }) 147 for _, r := range excludedRegions { 148 log.Infof("region: virtual [%x,%x)", r.virtual, r.virtual+r.length) 149 } 150 return excludedRegions 151 } 152 153 // computePhysicalRegions computes physical regions. 154 func computePhysicalRegions(excludedRegions []region) (physicalRegions []physicalRegion) { 155 physical := uintptr(reservedMemory) 156 addValidRegion := func(virtual, length uintptr) { 157 if length == 0 { 158 return 159 } 160 if virtual == 0 { 161 virtual += hostarch.PageSize 162 length -= hostarch.PageSize 163 } 164 if end := virtual + length; end > ring0.MaximumUserAddress { 165 length -= (end - ring0.MaximumUserAddress) 166 } 167 if length == 0 { 168 return 169 } 170 // Round physical up to the same alignment as the virtual 171 // address (with respect to faultBlockSize). 172 if offset := virtual &^ faultBlockMask; physical&^faultBlockMask != offset { 173 if newPhysical := (physical & faultBlockMask) + offset; newPhysical > physical { 174 physical = newPhysical // Round up by only a little bit. 175 } else { 176 physical = ((physical + faultBlockSize) & faultBlockMask) + offset 177 } 178 } 179 physicalRegions = append(physicalRegions, physicalRegion{ 180 region: region{ 181 virtual: virtual, 182 length: length, 183 }, 184 physical: physical, 185 }) 186 physical += length 187 } 188 lastExcludedEnd := uintptr(0) 189 for _, r := range excludedRegions { 190 addValidRegion(lastExcludedEnd, r.virtual-lastExcludedEnd) 191 lastExcludedEnd = r.virtual + r.length 192 } 193 addValidRegion(lastExcludedEnd, ring0.MaximumUserAddress-lastExcludedEnd) 194 195 // Do arch-specific actions on physical regions. 196 physicalRegions = archPhysicalRegions(physicalRegions) 197 198 // Dump our all physical regions. 199 for _, r := range physicalRegions { 200 log.Infof("physicalRegion: virtual [%x,%x) => physical [%x,%x)", 201 r.virtual, r.virtual+r.length, r.physical, r.physical+r.length) 202 } 203 return physicalRegions 204 } 205 206 // physicalInit initializes physical address mappings. 207 func physicalInit() { 208 physicalRegions = computePhysicalRegions(fillAddressSpace()) 209 } 210 211 // applyPhysicalRegions applies the given function on physical regions. 212 // 213 // Iteration continues as long as true is returned. The return value is the 214 // return from the last call to fn, or true if there are no entries. 215 // 216 // Precondition: physicalInit must have been called. 217 func applyPhysicalRegions(fn func(pr physicalRegion) bool) bool { 218 for _, pr := range physicalRegions { 219 if !fn(pr) { 220 return false 221 } 222 } 223 return true 224 } 225 226 // translateToPhysical translates the given virtual address. 227 // 228 // Precondition: physicalInit must have been called. 229 // 230 //go:nosplit 231 func translateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) { 232 for _, pr := range physicalRegions { 233 if pr.virtual <= virtual && virtual < pr.virtual+pr.length { 234 physical = pr.physical + (virtual - pr.virtual) 235 length = pr.length - (virtual - pr.virtual) 236 ok = true 237 return 238 } 239 } 240 return 241 }