github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/mm/procfs.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mm 16 17 import ( 18 "bytes" 19 "fmt" 20 21 "github.com/SagerNet/gvisor/pkg/context" 22 "github.com/SagerNet/gvisor/pkg/hostarch" 23 "github.com/SagerNet/gvisor/pkg/sentry/fs/proc/seqfile" 24 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 25 ) 26 27 const ( 28 // devMinorBits is the number of minor bits in a device number. Linux: 29 // include/linux/kdev_t.h:MINORBITS 30 devMinorBits = 20 31 32 vsyscallEnd = hostarch.Addr(0xffffffffff601000) 33 vsyscallMapsEntry = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" 34 vsyscallSmapsEntry = vsyscallMapsEntry + 35 "Size: 4 kB\n" + 36 "Rss: 0 kB\n" + 37 "Pss: 0 kB\n" + 38 "Shared_Clean: 0 kB\n" + 39 "Shared_Dirty: 0 kB\n" + 40 "Private_Clean: 0 kB\n" + 41 "Private_Dirty: 0 kB\n" + 42 "Referenced: 0 kB\n" + 43 "Anonymous: 0 kB\n" + 44 "AnonHugePages: 0 kB\n" + 45 "Shared_Hugetlb: 0 kB\n" + 46 "Private_Hugetlb: 0 kB\n" + 47 "Swap: 0 kB\n" + 48 "SwapPss: 0 kB\n" + 49 "KernelPageSize: 4 kB\n" + 50 "MMUPageSize: 4 kB\n" + 51 "Locked: 0 kB\n" + 52 "VmFlags: rd ex \n" 53 ) 54 55 // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. 56 func (mm *MemoryManager) NeedsUpdate(generation int64) bool { 57 return true 58 } 59 60 // ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to 61 // implement /proc/[pid]/maps. 62 func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) { 63 mm.mappingMu.RLock() 64 defer mm.mappingMu.RUnlock() 65 var start hostarch.Addr 66 67 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 68 mm.appendVMAMapsEntryLocked(ctx, vseg, buf) 69 } 70 71 // We always emulate vsyscall, so advertise it here. Everything about a 72 // vsyscall region is static, so just hard code the maps entry since we 73 // don't have a real vma backing it. The vsyscall region is at the end of 74 // the virtual address space so nothing should be mapped after it (if 75 // something is really mapped in the tiny ~10 MiB segment afterwards, we'll 76 // get the sorting on the maps file wrong at worst; but that's not possible 77 // on any current platform). 78 // 79 // Artifically adjust the seqfile handle so we only output vsyscall entry once. 80 if start != vsyscallEnd { 81 buf.WriteString(vsyscallMapsEntry) 82 } 83 } 84 85 // ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to 86 // implement /proc/[pid]/maps. 87 func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 88 mm.mappingMu.RLock() 89 defer mm.mappingMu.RUnlock() 90 var data []seqfile.SeqData 91 var start hostarch.Addr 92 if handle != nil { 93 start = *handle.(*hostarch.Addr) 94 } 95 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 96 vmaAddr := vseg.End() 97 data = append(data, seqfile.SeqData{ 98 Buf: mm.vmaMapsEntryLocked(ctx, vseg), 99 Handle: &vmaAddr, 100 }) 101 } 102 103 // We always emulate vsyscall, so advertise it here. Everything about a 104 // vsyscall region is static, so just hard code the maps entry since we 105 // don't have a real vma backing it. The vsyscall region is at the end of 106 // the virtual address space so nothing should be mapped after it (if 107 // something is really mapped in the tiny ~10 MiB segment afterwards, we'll 108 // get the sorting on the maps file wrong at worst; but that's not possible 109 // on any current platform). 110 // 111 // Artifically adjust the seqfile handle so we only output vsyscall entry once. 112 if start != vsyscallEnd { 113 vmaAddr := vsyscallEnd 114 data = append(data, seqfile.SeqData{ 115 Buf: []byte(vsyscallMapsEntry), 116 Handle: &vmaAddr, 117 }) 118 } 119 return data, 1 120 } 121 122 // vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by 123 // vseg, including the trailing newline. 124 // 125 // Preconditions: mm.mappingMu must be locked. 126 func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte { 127 var b bytes.Buffer 128 mm.appendVMAMapsEntryLocked(ctx, vseg, &b) 129 return b.Bytes() 130 } 131 132 // Preconditions: mm.mappingMu must be locked. 133 func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) { 134 vma := vseg.ValuePtr() 135 private := "p" 136 if !vma.private { 137 private = "s" 138 } 139 140 var dev, ino uint64 141 if vma.id != nil { 142 dev = vma.id.DeviceID() 143 ino = vma.id.InodeID() 144 } 145 devMajor := uint32(dev >> devMinorBits) 146 devMinor := uint32(dev & ((1 << devMinorBits) - 1)) 147 148 // Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() => 149 // stack_guard_page_start(). 150 lineLen, _ := fmt.Fprintf(b, "%08x-%08x %s%s %08x %02x:%02x %d ", 151 vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino) 152 153 // Figure out our filename or hint. 154 var s string 155 if vma.hint != "" { 156 s = vma.hint 157 } else if vma.id != nil { 158 // FIXME(jamieliu): We are holding mm.mappingMu here, which is 159 // consistent with Linux's holding mmap_sem in 160 // fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path(). 161 // However, it's not clear that fs.File.MappedName() is actually 162 // consistent with this lock order. 163 s = vma.id.MappedName(ctx) 164 } 165 if s != "" { 166 // Per linux, we pad until the 74th character. 167 for pad := 73 - lineLen; pad > 0; pad-- { 168 b.WriteByte(' ') 169 } 170 b.WriteString(s) 171 } 172 b.WriteByte('\n') 173 } 174 175 // ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to 176 // implement /proc/[pid]/maps. 177 func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) { 178 mm.mappingMu.RLock() 179 defer mm.mappingMu.RUnlock() 180 var start hostarch.Addr 181 182 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 183 mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf) 184 } 185 186 // We always emulate vsyscall, so advertise it here. See 187 // ReadMapsSeqFileData for additional commentary. 188 if start != vsyscallEnd { 189 buf.WriteString(vsyscallSmapsEntry) 190 } 191 } 192 193 // ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to 194 // implement /proc/[pid]/smaps. 195 func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 196 mm.mappingMu.RLock() 197 defer mm.mappingMu.RUnlock() 198 var data []seqfile.SeqData 199 var start hostarch.Addr 200 if handle != nil { 201 start = *handle.(*hostarch.Addr) 202 } 203 for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { 204 vmaAddr := vseg.End() 205 data = append(data, seqfile.SeqData{ 206 Buf: mm.vmaSmapsEntryLocked(ctx, vseg), 207 Handle: &vmaAddr, 208 }) 209 } 210 211 // We always emulate vsyscall, so advertise it here. See 212 // ReadMapsSeqFileData for additional commentary. 213 if start != vsyscallEnd { 214 vmaAddr := vsyscallEnd 215 data = append(data, seqfile.SeqData{ 216 Buf: []byte(vsyscallSmapsEntry), 217 Handle: &vmaAddr, 218 }) 219 } 220 return data, 1 221 } 222 223 // vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated 224 // by vseg, including the trailing newline. 225 // 226 // Preconditions: mm.mappingMu must be locked. 227 func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte { 228 var b bytes.Buffer 229 mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b) 230 return b.Bytes() 231 } 232 233 func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) { 234 mm.appendVMAMapsEntryLocked(ctx, vseg, b) 235 vma := vseg.ValuePtr() 236 237 // We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of 238 // requiring it to be locked as a precondition, to reduce the latency 239 // impact of reading /proc/[pid]/smaps on concurrent performance-sensitive 240 // operations requiring activeMu for writing like faults. 241 mm.activeMu.RLock() 242 var rss uint64 243 var anon uint64 244 vsegAR := vseg.Range() 245 for pseg := mm.pmas.LowerBoundSegment(vsegAR.Start); pseg.Ok() && pseg.Start() < vsegAR.End; pseg = pseg.NextSegment() { 246 psegAR := pseg.Range().Intersect(vsegAR) 247 size := uint64(psegAR.Length()) 248 rss += size 249 if pseg.ValuePtr().private { 250 anon += size 251 } 252 } 253 mm.activeMu.RUnlock() 254 255 fmt.Fprintf(b, "Size: %8d kB\n", vseg.Range().Length()/1024) 256 fmt.Fprintf(b, "Rss: %8d kB\n", rss/1024) 257 // Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma 258 // is only mapped by that pma. This avoids having to query memmap.Mappables 259 // for reference count information on each page. As a corollary, all pages 260 // are accounted as "private" whether or not the vma is private; compare 261 // Linux's fs/proc/task_mmu.c:smaps_account(). 262 fmt.Fprintf(b, "Pss: %8d kB\n", rss/1024) 263 fmt.Fprintf(b, "Shared_Clean: %8d kB\n", 0) 264 fmt.Fprintf(b, "Shared_Dirty: %8d kB\n", 0) 265 // Pretend that all pages are dirty if the vma is writable, and clean otherwise. 266 clean := rss 267 if vma.effectivePerms.Write { 268 clean = 0 269 } 270 fmt.Fprintf(b, "Private_Clean: %8d kB\n", clean/1024) 271 fmt.Fprintf(b, "Private_Dirty: %8d kB\n", (rss-clean)/1024) 272 // Pretend that all pages are "referenced" (recently touched). 273 fmt.Fprintf(b, "Referenced: %8d kB\n", rss/1024) 274 fmt.Fprintf(b, "Anonymous: %8d kB\n", anon/1024) 275 // Hugepages (hugetlb and THP) are not implemented. 276 fmt.Fprintf(b, "AnonHugePages: %8d kB\n", 0) 277 fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0) 278 fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0) 279 // Swap is not implemented. 280 fmt.Fprintf(b, "Swap: %8d kB\n", 0) 281 fmt.Fprintf(b, "SwapPss: %8d kB\n", 0) 282 fmt.Fprintf(b, "KernelPageSize: %8d kB\n", hostarch.PageSize/1024) 283 fmt.Fprintf(b, "MMUPageSize: %8d kB\n", hostarch.PageSize/1024) 284 locked := rss 285 if vma.mlockMode == memmap.MLockNone { 286 locked = 0 287 } 288 fmt.Fprintf(b, "Locked: %8d kB\n", locked/1024) 289 290 b.WriteString("VmFlags: ") 291 if vma.realPerms.Read { 292 b.WriteString("rd ") 293 } 294 if vma.realPerms.Write { 295 b.WriteString("wr ") 296 } 297 if vma.realPerms.Execute { 298 b.WriteString("ex ") 299 } 300 if vma.canWriteMappableLocked() { // VM_SHARED 301 b.WriteString("sh ") 302 } 303 if vma.maxPerms.Read { 304 b.WriteString("mr ") 305 } 306 if vma.maxPerms.Write { 307 b.WriteString("mw ") 308 } 309 if vma.maxPerms.Execute { 310 b.WriteString("me ") 311 } 312 if !vma.private { // VM_MAYSHARE 313 b.WriteString("ms ") 314 } 315 if vma.growsDown { 316 b.WriteString("gd ") 317 } 318 if vma.mlockMode != memmap.MLockNone { // VM_LOCKED 319 b.WriteString("lo ") 320 } 321 if vma.mlockMode == memmap.MLockLazy { // VM_LOCKONFAULT 322 b.WriteString("?? ") // no explicit encoding in fs/proc/task_mmu.c:show_smap_vma_flags() 323 } 324 if vma.private && vma.effectivePerms.Write { // VM_ACCOUNT 325 b.WriteString("ac ") 326 } 327 b.WriteString("\n") 328 }