gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/mm/procfs.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mm
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  
    21  	"gvisor.dev/gvisor/pkg/context"
    22  	"gvisor.dev/gvisor/pkg/hostarch"
    23  	"gvisor.dev/gvisor/pkg/log"
    24  	"gvisor.dev/gvisor/pkg/sentry/memmap"
    25  )
    26  
    27  const (
    28  	// devMinorBits is the number of minor bits in a device number. Linux:
    29  	// include/linux/kdev_t.h:MINORBITS
    30  	devMinorBits = 20
    31  
    32  	vsyscallEnd        = hostarch.Addr(0xffffffffff601000)
    33  	vsyscallMapsEntry  = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
    34  	vsyscallSmapsEntry = vsyscallMapsEntry +
    35  		"Size:                  4 kB\n" +
    36  		"Rss:                   0 kB\n" +
    37  		"Pss:                   0 kB\n" +
    38  		"Shared_Clean:          0 kB\n" +
    39  		"Shared_Dirty:          0 kB\n" +
    40  		"Private_Clean:         0 kB\n" +
    41  		"Private_Dirty:         0 kB\n" +
    42  		"Referenced:            0 kB\n" +
    43  		"Anonymous:             0 kB\n" +
    44  		"AnonHugePages:         0 kB\n" +
    45  		"Shared_Hugetlb:        0 kB\n" +
    46  		"Private_Hugetlb:       0 kB\n" +
    47  		"Swap:                  0 kB\n" +
    48  		"SwapPss:               0 kB\n" +
    49  		"KernelPageSize:        4 kB\n" +
    50  		"MMUPageSize:           4 kB\n" +
    51  		"Locked:                0 kB\n" +
    52  		"VmFlags: rd ex \n"
    53  )
    54  
    55  // MapsCallbackFuncForBuffer creates a /proc/[pid]/maps entry including the trailing newline.
    56  func (mm *MemoryManager) MapsCallbackFuncForBuffer(buf *bytes.Buffer) MapsCallbackFunc {
    57  	return func(start, end hostarch.Addr, permissions hostarch.AccessType, private string, offset uint64, devMajor, devMinor uint32, inode uint64, path string) {
    58  		// Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() =>
    59  		// stack_guard_page_start().
    60  		lineLen, err := fmt.Fprintf(buf, "%08x-%08x %s%s %08x %02x:%02x %d ",
    61  			start, end, permissions, private, offset, devMajor, devMinor, inode)
    62  		if err != nil {
    63  			log.Warningf("Failed to write to buffer with error: %v", err)
    64  			return
    65  		}
    66  
    67  		if path != "" {
    68  			// Per linux, we pad until the 74th character.
    69  			for pad := 73 - lineLen; pad > 0; pad-- {
    70  				buf.WriteByte(' ') // never returns a non-nil error
    71  			}
    72  			buf.WriteString(path) // never returns a non-nil error
    73  		}
    74  		buf.WriteByte('\n') // never returns a non-nil error
    75  	}
    76  }
    77  
    78  // ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to
    79  // implement /proc/[pid]/maps.
    80  func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, fn MapsCallbackFunc) {
    81  	// FIXME(b/235153601): Need to replace RLockBypass with RLockBypass
    82  	// after fixing b/235153601.
    83  	mm.mappingMu.RLockBypass()
    84  	defer mm.mappingMu.RUnlockBypass()
    85  	var start hostarch.Addr
    86  
    87  	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
    88  		mm.appendVMAMapsEntryLocked(ctx, vseg, fn)
    89  	}
    90  
    91  	// We always emulate vsyscall, so advertise it here. Everything about a
    92  	// vsyscall region is static, so just hard code the maps entry since we
    93  	// don't have a real vma backing it. The vsyscall region is at the end of
    94  	// the virtual address space so nothing should be mapped after it (if
    95  	// something is really mapped in the tiny ~10 MiB segment afterwards, we'll
    96  	// get the sorting on the maps file wrong at worst; but that's not possible
    97  	// on any current platform).
    98  	//
    99  	// Artificially adjust the seqfile handle so we only output vsyscall entry once.
   100  	if start != vsyscallEnd {
   101  		fn(hostarch.Addr(0xffffffffff600000), hostarch.Addr(0xffffffffff601000), hostarch.ReadExecute, "p", 0, 0, 0, 0, "[vsyscall]")
   102  	}
   103  }
   104  
   105  // vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by
   106  // vseg, including the trailing newline.
   107  //
   108  // Preconditions: mm.mappingMu must be locked.
   109  func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
   110  	var b bytes.Buffer
   111  	mm.appendVMAMapsEntryLocked(ctx, vseg, mm.MapsCallbackFuncForBuffer(&b))
   112  	return b.Bytes()
   113  }
   114  
   115  // Preconditions: mm.mappingMu must be locked.
   116  func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaIterator, fn MapsCallbackFunc) {
   117  	vma := vseg.ValuePtr()
   118  	private := "p"
   119  	if !vma.private {
   120  		private = "s"
   121  	}
   122  
   123  	var dev, ino uint64
   124  	if vma.id != nil {
   125  		dev = vma.id.DeviceID()
   126  		ino = vma.id.InodeID()
   127  	}
   128  	devMajor := uint32(dev >> devMinorBits)
   129  	devMinor := uint32(dev & ((1 << devMinorBits) - 1))
   130  
   131  	// Figure out our filename or hint.
   132  	var path string
   133  	if vma.hint != "" {
   134  		path = vma.hint
   135  	} else if vma.id != nil {
   136  		// FIXME(jamieliu): We are holding mm.mappingMu here, which is
   137  		// consistent with Linux's holding mmap_sem in
   138  		// fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path().
   139  		// However, it's not clear that fs.File.MappedName() is actually
   140  		// consistent with this lock order.
   141  		path = vma.id.MappedName(ctx)
   142  	}
   143  	fn(vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino, path)
   144  }
   145  
   146  // ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to
   147  // implement /proc/[pid]/maps.
   148  func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) {
   149  	// FIXME(b/235153601): Need to replace RLockBypass with RLockBypass
   150  	// after fixing b/235153601.
   151  	mm.mappingMu.RLockBypass()
   152  	defer mm.mappingMu.RUnlockBypass()
   153  	var start hostarch.Addr
   154  
   155  	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
   156  		mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf)
   157  	}
   158  
   159  	// We always emulate vsyscall, so advertise it here. See
   160  	// ReadMapsSeqFileData for additional commentary.
   161  	if start != vsyscallEnd {
   162  		buf.WriteString(vsyscallSmapsEntry)
   163  	}
   164  }
   165  
   166  // vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated
   167  // by vseg, including the trailing newline.
   168  //
   169  // Preconditions: mm.mappingMu must be locked.
   170  func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
   171  	var b bytes.Buffer
   172  	mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b)
   173  	return b.Bytes()
   174  }
   175  
   176  func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
   177  	mm.appendVMAMapsEntryLocked(ctx, vseg, mm.MapsCallbackFuncForBuffer(b))
   178  	vma := vseg.ValuePtr()
   179  
   180  	// We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
   181  	// requiring it to be locked as a precondition, to reduce the latency
   182  	// impact of reading /proc/[pid]/smaps on concurrent performance-sensitive
   183  	// operations requiring activeMu for writing like faults.
   184  	mm.activeMu.RLock()
   185  	var rss uint64
   186  	var anon uint64
   187  	vsegAR := vseg.Range()
   188  	for pseg := mm.pmas.LowerBoundSegment(vsegAR.Start); pseg.Ok() && pseg.Start() < vsegAR.End; pseg = pseg.NextSegment() {
   189  		psegAR := pseg.Range().Intersect(vsegAR)
   190  		size := uint64(psegAR.Length())
   191  		rss += size
   192  		if pseg.ValuePtr().private {
   193  			anon += size
   194  		}
   195  	}
   196  	mm.activeMu.RUnlock()
   197  
   198  	fmt.Fprintf(b, "Size:           %8d kB\n", vseg.Range().Length()/1024)
   199  	fmt.Fprintf(b, "Rss:            %8d kB\n", rss/1024)
   200  	// Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
   201  	// is only mapped by that pma. This avoids having to query memmap.Mappables
   202  	// for reference count information on each page. As a corollary, all pages
   203  	// are accounted as "private" whether or not the vma is private; compare
   204  	// Linux's fs/proc/task_mmu.c:smaps_account().
   205  	fmt.Fprintf(b, "Pss:            %8d kB\n", rss/1024)
   206  	fmt.Fprintf(b, "Shared_Clean:   %8d kB\n", 0)
   207  	fmt.Fprintf(b, "Shared_Dirty:   %8d kB\n", 0)
   208  	// Pretend that all pages are dirty if the vma is writable, and clean otherwise.
   209  	clean := rss
   210  	if vma.effectivePerms.Write {
   211  		clean = 0
   212  	}
   213  	fmt.Fprintf(b, "Private_Clean:  %8d kB\n", clean/1024)
   214  	fmt.Fprintf(b, "Private_Dirty:  %8d kB\n", (rss-clean)/1024)
   215  	// Pretend that all pages are "referenced" (recently touched).
   216  	fmt.Fprintf(b, "Referenced:     %8d kB\n", rss/1024)
   217  	fmt.Fprintf(b, "Anonymous:      %8d kB\n", anon/1024)
   218  	// Hugepages (hugetlb and THP) are not implemented.
   219  	fmt.Fprintf(b, "AnonHugePages:  %8d kB\n", 0)
   220  	fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0)
   221  	fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0)
   222  	// Swap is not implemented.
   223  	fmt.Fprintf(b, "Swap:           %8d kB\n", 0)
   224  	fmt.Fprintf(b, "SwapPss:        %8d kB\n", 0)
   225  	fmt.Fprintf(b, "KernelPageSize: %8d kB\n", hostarch.PageSize/1024)
   226  	fmt.Fprintf(b, "MMUPageSize:    %8d kB\n", hostarch.PageSize/1024)
   227  	locked := rss
   228  	if vma.mlockMode == memmap.MLockNone {
   229  		locked = 0
   230  	}
   231  	fmt.Fprintf(b, "Locked:         %8d kB\n", locked/1024)
   232  
   233  	b.WriteString("VmFlags: ")
   234  	if vma.realPerms.Read {
   235  		b.WriteString("rd ")
   236  	}
   237  	if vma.realPerms.Write {
   238  		b.WriteString("wr ")
   239  	}
   240  	if vma.realPerms.Execute {
   241  		b.WriteString("ex ")
   242  	}
   243  	if vma.canWriteMappableLocked() { // VM_SHARED
   244  		b.WriteString("sh ")
   245  	}
   246  	if vma.maxPerms.Read {
   247  		b.WriteString("mr ")
   248  	}
   249  	if vma.maxPerms.Write {
   250  		b.WriteString("mw ")
   251  	}
   252  	if vma.maxPerms.Execute {
   253  		b.WriteString("me ")
   254  	}
   255  	if !vma.private { // VM_MAYSHARE
   256  		b.WriteString("ms ")
   257  	}
   258  	if vma.growsDown {
   259  		b.WriteString("gd ")
   260  	}
   261  	if vma.mlockMode != memmap.MLockNone { // VM_LOCKED
   262  		b.WriteString("lo ")
   263  	}
   264  	if vma.mlockMode == memmap.MLockLazy { // VM_LOCKONFAULT
   265  		b.WriteString("?? ") // no explicit encoding in fs/proc/task_mmu.c:show_smap_vma_flags()
   266  	}
   267  	if vma.private && vma.effectivePerms.Write { // VM_ACCOUNT
   268  		b.WriteString("ac ")
   269  	}
   270  	b.WriteString("\n")
   271  }