gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/mm/procfs.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mm 16 17 import ( 18 "bytes" 19 "fmt" 20 21 "gvisor.dev/gvisor/pkg/context" 22 "gvisor.dev/gvisor/pkg/hostarch" 23 "gvisor.dev/gvisor/pkg/log" 24 "gvisor.dev/gvisor/pkg/sentry/memmap" 25 ) 26 27 const ( 28 // devMinorBits is the number of minor bits in a device number. Linux: 29 // include/linux/kdev_t.h:MINORBITS 30 devMinorBits = 20 31 32 vsyscallEnd = hostarch.Addr(0xffffffffff601000) 33 vsyscallMapsEntry = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" 34 vsyscallSmapsEntry = vsyscallMapsEntry + 35 "Size: 4 kB\n" + 36 "Rss: 0 kB\n" + 37 "Pss: 0 kB\n" + 38 "Shared_Clean: 0 kB\n" + 39 "Shared_Dirty: 0 kB\n" + 40 "Private_Clean: 0 kB\n" + 41 "Private_Dirty: 0 kB\n" + 42 "Referenced: 0 kB\n" + 43 "Anonymous: 0 kB\n" + 44 "AnonHugePages: 0 kB\n" + 45 "Shared_Hugetlb: 0 kB\n" + 46 "Private_Hugetlb: 0 kB\n" + 47 "Swap: 0 kB\n" + 48 "SwapPss: 0 kB\n" + 49 "KernelPageSize: 4 kB\n" + 50 "MMUPageSize: 4 kB\n" + 51 "Locked: 0 kB\n" + 52 "VmFlags: rd ex \n" 53 ) 54 55 // MapsCallbackFuncForBuffer creates a /proc/[pid]/maps entry including the trailing newline. 56 func (mm *MemoryManager) MapsCallbackFuncForBuffer(buf *bytes.Buffer) MapsCallbackFunc { 57 return func(start, end hostarch.Addr, permissions hostarch.AccessType, private string, offset uint64, devMajor, devMinor uint32, inode uint64, path string) { 58 // Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() => 59 // stack_guard_page_start(). 60 lineLen, err := fmt.Fprintf(buf, "%08x-%08x %s%s %08x %02x:%02x %d ", 61 start, end, permissions, private, offset, devMajor, devMinor, inode) 62 if err != nil { 63 log.Warningf("Failed to write to buffer with error: %v", err) 64 return 65 } 66 67 if path != "" { 68 // Per linux, we pad until the 74th character. 69 for pad := 73 - lineLen; pad > 0; pad-- { 70 buf.WriteByte(' ') // never returns a non-nil error 71 } 72 buf.WriteString(path) // never returns a non-nil error 73 } 74 buf.WriteByte('\n') // never returns a non-nil error 75 } 76 } 77 78 // ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to 79 // implement /proc/[pid]/maps. 80 func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, fn MapsCallbackFunc) { 81 // FIXME(b/235153601): Need to replace RLockBypass with RLockBypass 82 // after fixing b/235153601. 83 mm.mappingMu.RLockBypass() 84 defer mm.mappingMu.RUnlockBypass() 85 var start hostarch.Addr 86 87 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 88 mm.appendVMAMapsEntryLocked(ctx, vseg, fn) 89 } 90 91 // We always emulate vsyscall, so advertise it here. Everything about a 92 // vsyscall region is static, so just hard code the maps entry since we 93 // don't have a real vma backing it. The vsyscall region is at the end of 94 // the virtual address space so nothing should be mapped after it (if 95 // something is really mapped in the tiny ~10 MiB segment afterwards, we'll 96 // get the sorting on the maps file wrong at worst; but that's not possible 97 // on any current platform). 98 // 99 // Artificially adjust the seqfile handle so we only output vsyscall entry once. 100 if start != vsyscallEnd { 101 fn(hostarch.Addr(0xffffffffff600000), hostarch.Addr(0xffffffffff601000), hostarch.ReadExecute, "p", 0, 0, 0, 0, "[vsyscall]") 102 } 103 } 104 105 // vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by 106 // vseg, including the trailing newline. 107 // 108 // Preconditions: mm.mappingMu must be locked. 109 func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte { 110 var b bytes.Buffer 111 mm.appendVMAMapsEntryLocked(ctx, vseg, mm.MapsCallbackFuncForBuffer(&b)) 112 return b.Bytes() 113 } 114 115 // Preconditions: mm.mappingMu must be locked. 116 func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaIterator, fn MapsCallbackFunc) { 117 vma := vseg.ValuePtr() 118 private := "p" 119 if !vma.private { 120 private = "s" 121 } 122 123 var dev, ino uint64 124 if vma.id != nil { 125 dev = vma.id.DeviceID() 126 ino = vma.id.InodeID() 127 } 128 devMajor := uint32(dev >> devMinorBits) 129 devMinor := uint32(dev & ((1 << devMinorBits) - 1)) 130 131 // Figure out our filename or hint. 132 var path string 133 if vma.hint != "" { 134 path = vma.hint 135 } else if vma.id != nil { 136 // FIXME(jamieliu): We are holding mm.mappingMu here, which is 137 // consistent with Linux's holding mmap_sem in 138 // fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path(). 139 // However, it's not clear that fs.File.MappedName() is actually 140 // consistent with this lock order. 141 path = vma.id.MappedName(ctx) 142 } 143 fn(vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino, path) 144 } 145 146 // ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to 147 // implement /proc/[pid]/maps. 148 func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) { 149 // FIXME(b/235153601): Need to replace RLockBypass with RLockBypass 150 // after fixing b/235153601. 151 mm.mappingMu.RLockBypass() 152 defer mm.mappingMu.RUnlockBypass() 153 var start hostarch.Addr 154 155 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 156 mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf) 157 } 158 159 // We always emulate vsyscall, so advertise it here. See 160 // ReadMapsSeqFileData for additional commentary. 161 if start != vsyscallEnd { 162 buf.WriteString(vsyscallSmapsEntry) 163 } 164 } 165 166 // vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated 167 // by vseg, including the trailing newline. 168 // 169 // Preconditions: mm.mappingMu must be locked. 170 func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte { 171 var b bytes.Buffer 172 mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b) 173 return b.Bytes() 174 } 175 176 func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) { 177 mm.appendVMAMapsEntryLocked(ctx, vseg, mm.MapsCallbackFuncForBuffer(b)) 178 vma := vseg.ValuePtr() 179 180 // We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of 181 // requiring it to be locked as a precondition, to reduce the latency 182 // impact of reading /proc/[pid]/smaps on concurrent performance-sensitive 183 // operations requiring activeMu for writing like faults. 184 mm.activeMu.RLock() 185 var rss uint64 186 var anon uint64 187 vsegAR := vseg.Range() 188 for pseg := mm.pmas.LowerBoundSegment(vsegAR.Start); pseg.Ok() && pseg.Start() < vsegAR.End; pseg = pseg.NextSegment() { 189 psegAR := pseg.Range().Intersect(vsegAR) 190 size := uint64(psegAR.Length()) 191 rss += size 192 if pseg.ValuePtr().private { 193 anon += size 194 } 195 } 196 mm.activeMu.RUnlock() 197 198 fmt.Fprintf(b, "Size: %8d kB\n", vseg.Range().Length()/1024) 199 fmt.Fprintf(b, "Rss: %8d kB\n", rss/1024) 200 // Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma 201 // is only mapped by that pma. This avoids having to query memmap.Mappables 202 // for reference count information on each page. As a corollary, all pages 203 // are accounted as "private" whether or not the vma is private; compare 204 // Linux's fs/proc/task_mmu.c:smaps_account(). 205 fmt.Fprintf(b, "Pss: %8d kB\n", rss/1024) 206 fmt.Fprintf(b, "Shared_Clean: %8d kB\n", 0) 207 fmt.Fprintf(b, "Shared_Dirty: %8d kB\n", 0) 208 // Pretend that all pages are dirty if the vma is writable, and clean otherwise. 209 clean := rss 210 if vma.effectivePerms.Write { 211 clean = 0 212 } 213 fmt.Fprintf(b, "Private_Clean: %8d kB\n", clean/1024) 214 fmt.Fprintf(b, "Private_Dirty: %8d kB\n", (rss-clean)/1024) 215 // Pretend that all pages are "referenced" (recently touched). 216 fmt.Fprintf(b, "Referenced: %8d kB\n", rss/1024) 217 fmt.Fprintf(b, "Anonymous: %8d kB\n", anon/1024) 218 // Hugepages (hugetlb and THP) are not implemented. 219 fmt.Fprintf(b, "AnonHugePages: %8d kB\n", 0) 220 fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0) 221 fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0) 222 // Swap is not implemented. 223 fmt.Fprintf(b, "Swap: %8d kB\n", 0) 224 fmt.Fprintf(b, "SwapPss: %8d kB\n", 0) 225 fmt.Fprintf(b, "KernelPageSize: %8d kB\n", hostarch.PageSize/1024) 226 fmt.Fprintf(b, "MMUPageSize: %8d kB\n", hostarch.PageSize/1024) 227 locked := rss 228 if vma.mlockMode == memmap.MLockNone { 229 locked = 0 230 } 231 fmt.Fprintf(b, "Locked: %8d kB\n", locked/1024) 232 233 b.WriteString("VmFlags: ") 234 if vma.realPerms.Read { 235 b.WriteString("rd ") 236 } 237 if vma.realPerms.Write { 238 b.WriteString("wr ") 239 } 240 if vma.realPerms.Execute { 241 b.WriteString("ex ") 242 } 243 if vma.canWriteMappableLocked() { // VM_SHARED 244 b.WriteString("sh ") 245 } 246 if vma.maxPerms.Read { 247 b.WriteString("mr ") 248 } 249 if vma.maxPerms.Write { 250 b.WriteString("mw ") 251 } 252 if vma.maxPerms.Execute { 253 b.WriteString("me ") 254 } 255 if !vma.private { // VM_MAYSHARE 256 b.WriteString("ms ") 257 } 258 if vma.growsDown { 259 b.WriteString("gd ") 260 } 261 if vma.mlockMode != memmap.MLockNone { // VM_LOCKED 262 b.WriteString("lo ") 263 } 264 if vma.mlockMode == memmap.MLockLazy { // VM_LOCKONFAULT 265 b.WriteString("?? ") // no explicit encoding in fs/proc/task_mmu.c:show_smap_vma_flags() 266 } 267 if vma.private && vma.effectivePerms.Write { // VM_ACCOUNT 268 b.WriteString("ac ") 269 } 270 b.WriteString("\n") 271 }