github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/kvm/address_space.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kvm 16 17 import ( 18 "github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops" 19 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 20 "github.com/nicocha30/gvisor-ligolo/pkg/ring0/pagetables" 21 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap" 22 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform" 23 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 24 ) 25 26 // dirtySet tracks vCPUs for invalidation. 27 type dirtySet struct { 28 vCPUMasks []atomicbitops.Uint64 29 } 30 31 // forEach iterates over all CPUs in the dirty set. 32 // 33 //go:nosplit 34 func (ds *dirtySet) forEach(m *machine, fn func(c *vCPU)) { 35 for index := range ds.vCPUMasks { 36 mask := ds.vCPUMasks[index].Swap(0) 37 if mask != 0 { 38 for bit := 0; bit < 64; bit++ { 39 if mask&(1<<uint64(bit)) == 0 { 40 continue 41 } 42 id := 64*index + bit 43 fn(m.vCPUsByID[id]) 44 } 45 } 46 } 47 } 48 49 // mark marks the given vCPU as dirty and returns whether it was previously 50 // clean. Being previously clean implies that a flush is needed on entry. 51 func (ds *dirtySet) mark(c *vCPU) bool { 52 index := uint64(c.id) / 64 53 bit := uint64(1) << uint(c.id%64) 54 55 oldValue := ds.vCPUMasks[index].Load() 56 if oldValue&bit != 0 { 57 return false // Not clean. 58 } 59 60 // Set the bit unilaterally, and ensure that a flush takes place. Note 61 // that it's possible for races to occur here, but since the flush is 62 // taking place long after these lines there's no race in practice. 63 atomicbitops.OrUint64(&ds.vCPUMasks[index], bit) 64 return true // Previously clean. 65 } 66 67 // addressSpace is a wrapper for PageTables. 68 type addressSpace struct { 69 platform.NoAddressSpaceIO 70 71 // mu is the lock for modifications to the address space. 72 // 73 // Note that the page tables themselves are not locked. 74 mu sync.Mutex 75 76 // machine is the underlying machine. 77 machine *machine 78 79 // pageTables are for this particular address space. 80 pageTables *pagetables.PageTables 81 82 // dirtySet is the set of dirty vCPUs. 83 dirtySet *dirtySet 84 } 85 86 // Invalidate interrupts all dirty contexts. 87 func (as *addressSpace) Invalidate() { 88 as.mu.Lock() 89 defer as.mu.Unlock() 90 as.invalidate() 91 } 92 93 // Touch adds the given vCPU to the dirty list. 94 // 95 // The return value indicates whether a flush is required. 96 func (as *addressSpace) Touch(c *vCPU) bool { 97 return as.dirtySet.mark(c) 98 } 99 100 type hostMapEntry struct { 101 addr uintptr 102 length uintptr 103 } 104 105 // mapLocked maps the given host entry. 106 // 107 // +checkescape:hard,stack 108 // 109 //go:nosplit 110 func (as *addressSpace) mapLocked(addr hostarch.Addr, m hostMapEntry, at hostarch.AccessType) (inv bool) { 111 for m.length > 0 { 112 physical, length, ok := translateToPhysical(m.addr) 113 if !ok { 114 panic("unable to translate segment") 115 } 116 if length > m.length { 117 length = m.length 118 } 119 120 // Ensure that this map has physical mappings. If the page does 121 // not have physical mappings, the KVM module may inject 122 // spurious exceptions when emulation fails (i.e. it tries to 123 // emulate because the RIP is pointed at those pages). 124 as.machine.mapPhysical(physical, length, physicalRegions) 125 126 // Install the page table mappings. Note that the ordering is 127 // important; if the pagetable mappings were installed before 128 // ensuring the physical pages were available, then some other 129 // thread could theoretically access them. 130 inv = as.pageTables.Map(addr, length, pagetables.MapOpts{ 131 AccessType: at, 132 User: true, 133 }, physical) || inv 134 m.addr += length 135 m.length -= length 136 addr += hostarch.Addr(length) 137 } 138 139 return inv 140 } 141 142 // MapFile implements platform.AddressSpace.MapFile. 143 func (as *addressSpace) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error { 144 as.mu.Lock() 145 defer as.mu.Unlock() 146 147 // Get mappings in the sentry's address space, which are guaranteed to be 148 // valid as long as a reference is held on the mapped pages (which is in 149 // turn required by AddressSpace.MapFile precondition). 150 // 151 // If precommit is true, we will touch mappings to commit them, so ensure 152 // that mappings are readable from sentry context. 153 // 154 // We don't execute from application file-mapped memory, and guest page 155 // tables don't care if we have execute permission (but they do need pages 156 // to be readable). 157 bs, err := f.MapInternal(fr, hostarch.AccessType{ 158 Read: at.Read || at.Execute || precommit, 159 Write: at.Write, 160 }) 161 if err != nil { 162 return err 163 } 164 165 // See block in mapLocked. 166 as.pageTables.Allocator.(*allocator).cpu = as.machine.Get() 167 defer as.machine.Put(as.pageTables.Allocator.(*allocator).cpu) 168 169 // Map the mappings in the sentry's address space (guest physical memory) 170 // into the application's address space (guest virtual memory). 171 inv := false 172 for !bs.IsEmpty() { 173 b := bs.Head() 174 bs = bs.Tail() 175 // Since fr was page-aligned, b should also be page-aligned. We do the 176 // lookup in our host page tables for this translation. 177 if precommit { 178 s := b.ToSlice() 179 for i := 0; i < len(s); i += hostarch.PageSize { 180 _ = s[i] // Touch to commit. 181 } 182 } 183 184 // See bluepill_allocator.go. 185 bluepill(as.pageTables.Allocator.(*allocator).cpu) 186 187 // Perform the mapping. 188 prev := as.mapLocked(addr, hostMapEntry{ 189 addr: b.Addr(), 190 length: uintptr(b.Len()), 191 }, at) 192 inv = inv || prev 193 addr += hostarch.Addr(b.Len()) 194 } 195 if inv { 196 as.invalidate() 197 } 198 199 return nil 200 } 201 202 // unmapLocked is an escape-checked wrapped around Unmap. 203 // 204 // +checkescape:hard,stack 205 // 206 //go:nosplit 207 func (as *addressSpace) unmapLocked(addr hostarch.Addr, length uint64) bool { 208 return as.pageTables.Unmap(addr, uintptr(length)) 209 } 210 211 // Unmap unmaps the given range by calling pagetables.PageTables.Unmap. 212 func (as *addressSpace) Unmap(addr hostarch.Addr, length uint64) { 213 as.mu.Lock() 214 defer as.mu.Unlock() 215 216 // See above & bluepill_allocator.go. 217 as.pageTables.Allocator.(*allocator).cpu = as.machine.Get() 218 defer as.machine.Put(as.pageTables.Allocator.(*allocator).cpu) 219 bluepill(as.pageTables.Allocator.(*allocator).cpu) 220 221 if prev := as.unmapLocked(addr, length); prev { 222 // Invalidate all active vCPUs. 223 as.invalidate() 224 225 // Recycle any freed intermediate pages. 226 as.pageTables.Allocator.Recycle() 227 } 228 } 229 230 // Release releases the page tables. 231 func (as *addressSpace) Release() { 232 as.Unmap(0, ^uint64(0)) 233 234 // Free all pages from the allocator. 235 as.pageTables.Allocator.(*allocator).base.Drain() 236 237 // Drop all cached machine references. 238 as.machine.dropPageTables(as.pageTables) 239 } 240 241 // PreFork implements platform.AddressSpace.PreFork. 242 func (as *addressSpace) PreFork() {} 243 244 // PostFork implements platform.AddressSpace.PostFork. 245 func (as *addressSpace) PostFork() {}