github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsutil/host_file_mapper.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fsutil 16 17 import ( 18 "fmt" 19 20 "github.com/MerlinKodo/gvisor/pkg/hostarch" 21 "github.com/MerlinKodo/gvisor/pkg/log" 22 "github.com/MerlinKodo/gvisor/pkg/safemem" 23 "github.com/MerlinKodo/gvisor/pkg/sentry/memmap" 24 "golang.org/x/sys/unix" 25 ) 26 27 // HostFileMapper caches mappings of an arbitrary host file descriptor. It is 28 // used by implementations of memmap.Mappable that represent a host file 29 // descriptor. 30 // 31 // +stateify savable 32 type HostFileMapper struct { 33 // HostFile conceptually breaks the file into pieces called chunks, of 34 // size and alignment chunkSize, and caches mappings of the file on a chunk 35 // granularity. 36 37 refsMu refsMutex `state:"nosave"` 38 39 // refs maps chunk start offsets to the sum of reference counts for all 40 // pages in that chunk. refs is protected by refsMu. 41 refs map[uint64]int32 42 43 mapsMu mapsMutex `state:"nosave"` 44 45 // mappings maps chunk start offsets to mappings of those chunks, 46 // obtained by calling unix.Mmap. mappings is protected by 47 // mapsMu. 48 mappings map[uint64]mapping `state:"nosave"` 49 } 50 51 const ( 52 chunkShift = hostarch.HugePageShift 53 chunkSize = 1 << chunkShift 54 chunkMask = chunkSize - 1 55 ) 56 57 func pagesInChunk(mr memmap.MappableRange, chunkStart uint64) int32 { 58 return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / hostarch.PageSize) 59 } 60 61 type mapping struct { 62 addr uintptr 63 writable bool 64 } 65 66 // Init must be called on zero-value HostFileMappers before first use. 67 func (f *HostFileMapper) Init() { 68 f.refs = make(map[uint64]int32) 69 f.mappings = make(map[uint64]mapping) 70 } 71 72 // IsInited returns true if f.Init() has been called. This is used when 73 // restoring a checkpoint that contains a HostFileMapper that may or may not 74 // have been initialized. 75 func (f *HostFileMapper) IsInited() bool { 76 return f.refs != nil 77 } 78 79 // NewHostFileMapper returns an initialized HostFileMapper allocated on the 80 // heap with no references or cached mappings. 81 func NewHostFileMapper() *HostFileMapper { 82 f := &HostFileMapper{} 83 f.Init() 84 return f 85 } 86 87 // IncRefOn increments the reference count on all offsets in mr. 88 // 89 // Preconditions: 90 // - mr.Length() != 0. 91 // - mr.Start and mr.End must be page-aligned. 92 func (f *HostFileMapper) IncRefOn(mr memmap.MappableRange) { 93 f.refsMu.Lock() 94 defer f.refsMu.Unlock() 95 chunkStart := mr.Start &^ chunkMask 96 for { 97 refs := f.refs[chunkStart] 98 pgs := pagesInChunk(mr, chunkStart) 99 if refs+pgs < refs { 100 // Would overflow. 101 panic(fmt.Sprintf("HostFileMapper.IncRefOn(%v): adding %d page references to chunk %#x, which has %d page references", mr, pgs, chunkStart, refs)) 102 } 103 f.refs[chunkStart] = refs + pgs 104 chunkStart += chunkSize 105 if chunkStart >= mr.End || chunkStart == 0 { 106 break 107 } 108 } 109 } 110 111 // DecRefOn decrements the reference count on all offsets in mr. 112 // 113 // Preconditions: 114 // - mr.Length() != 0. 115 // - mr.Start and mr.End must be page-aligned. 116 func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) { 117 f.refsMu.Lock() 118 defer f.refsMu.Unlock() 119 chunkStart := mr.Start &^ chunkMask 120 for { 121 refs := f.refs[chunkStart] 122 pgs := pagesInChunk(mr, chunkStart) 123 switch { 124 case refs > pgs: 125 f.refs[chunkStart] = refs - pgs 126 case refs == pgs: 127 f.mapsMu.Lock() 128 delete(f.refs, chunkStart) 129 if m, ok := f.mappings[chunkStart]; ok { 130 f.unmapAndRemoveLocked(chunkStart, m) 131 } 132 f.mapsMu.Unlock() 133 case refs < pgs: 134 panic(fmt.Sprintf("HostFileMapper.DecRefOn(%v): removing %d page references from chunk %#x, which has %d page references", mr, pgs, chunkStart, refs)) 135 } 136 chunkStart += chunkSize 137 if chunkStart >= mr.End || chunkStart == 0 { 138 break 139 } 140 } 141 } 142 143 // MapInternal returns a mapping of offsets in fr from fd. The returned 144 // safemem.BlockSeq is valid as long as at least one reference is held on all 145 // offsets in fr or until the next call to UnmapAll. 146 // 147 // Preconditions: The caller must hold a reference on all offsets in fr. 148 func (f *HostFileMapper) MapInternal(fr memmap.FileRange, fd int, write bool) (safemem.BlockSeq, error) { 149 chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) 150 f.mapsMu.Lock() 151 defer f.mapsMu.Unlock() 152 if chunks == 1 { 153 // Avoid an unnecessary slice allocation. 154 var seq safemem.BlockSeq 155 err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) { 156 seq = safemem.BlockSeqOf(b) 157 }) 158 return seq, err 159 } 160 blocks := make([]safemem.Block, 0, chunks) 161 err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) { 162 blocks = append(blocks, b) 163 }) 164 return safemem.BlockSeqFromSlice(blocks), err 165 } 166 167 // Preconditions: f.mapsMu must be locked. 168 func (f *HostFileMapper) forEachMappingBlockLocked(fr memmap.FileRange, fd int, write bool, fn func(safemem.Block)) error { 169 prot := unix.PROT_READ 170 if write { 171 prot |= unix.PROT_WRITE 172 } 173 chunkStart := fr.Start &^ chunkMask 174 for { 175 m, ok := f.mappings[chunkStart] 176 if !ok { 177 addr, _, errno := unix.Syscall6( 178 unix.SYS_MMAP, 179 0, 180 chunkSize, 181 uintptr(prot), 182 unix.MAP_SHARED, 183 uintptr(fd), 184 uintptr(chunkStart)) 185 if errno != 0 { 186 return errno 187 } 188 m = mapping{addr, write} 189 f.mappings[chunkStart] = m 190 } else if write && !m.writable { 191 addr, _, errno := unix.Syscall6( 192 unix.SYS_MMAP, 193 m.addr, 194 chunkSize, 195 uintptr(prot), 196 unix.MAP_SHARED|unix.MAP_FIXED, 197 uintptr(fd), 198 uintptr(chunkStart)) 199 if errno != 0 { 200 return errno 201 } 202 m = mapping{addr, write} 203 f.mappings[chunkStart] = m 204 } 205 var startOff uint64 206 if chunkStart < fr.Start { 207 startOff = fr.Start - chunkStart 208 } 209 endOff := uint64(chunkSize) 210 if chunkStart+chunkSize > fr.End { 211 endOff = fr.End - chunkStart 212 } 213 fn(f.unsafeBlockFromChunkMapping(m.addr).TakeFirst64(endOff).DropFirst64(startOff)) 214 chunkStart += chunkSize 215 if chunkStart >= fr.End || chunkStart == 0 { 216 break 217 } 218 } 219 return nil 220 } 221 222 // UnmapAll unmaps all cached mappings. Callers are responsible for 223 // synchronization with mappings returned by previous calls to MapInternal. 224 func (f *HostFileMapper) UnmapAll() { 225 f.mapsMu.Lock() 226 defer f.mapsMu.Unlock() 227 for chunkStart, m := range f.mappings { 228 f.unmapAndRemoveLocked(chunkStart, m) 229 } 230 } 231 232 // Preconditions: 233 // - f.mapsMu must be locked. 234 // - f.mappings[chunkStart] == m. 235 func (f *HostFileMapper) unmapAndRemoveLocked(chunkStart uint64, m mapping) { 236 if _, _, errno := unix.Syscall(unix.SYS_MUNMAP, m.addr, chunkSize, 0); errno != 0 { 237 // This leaks address space and is unexpected, but is otherwise 238 // harmless, so complain but don't panic. 239 log.Warningf("HostFileMapper: failed to unmap mapping %#x for chunk %#x: %v", m.addr, chunkStart, errno) 240 } 241 delete(f.mappings, chunkStart) 242 } 243 244 // RegenerateMappings must be called when the file description mapped by f 245 // changes, to replace existing mappings of the previous file description. 246 func (f *HostFileMapper) RegenerateMappings(fd int) error { 247 f.mapsMu.Lock() 248 defer f.mapsMu.Unlock() 249 250 for chunkStart, m := range f.mappings { 251 prot := unix.PROT_READ 252 if m.writable { 253 prot |= unix.PROT_WRITE 254 } 255 _, _, errno := unix.Syscall6( 256 unix.SYS_MMAP, 257 m.addr, 258 chunkSize, 259 uintptr(prot), 260 unix.MAP_SHARED|unix.MAP_FIXED, 261 uintptr(fd), 262 uintptr(chunkStart)) 263 if errno != 0 { 264 return errno 265 } 266 } 267 return nil 268 }