github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/hostmm/hostmm.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package hostmm provides tools for interacting with the host Linux kernel's
    16  // virtual memory management subsystem.
    17  package hostmm
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"path"
    23  
    24  	"github.com/metacubex/gvisor/pkg/eventfd"
    25  	"github.com/metacubex/gvisor/pkg/log"
    26  )
    27  
    28  // NotifyCurrentMemcgPressureCallback requests that f is called whenever the
    29  // calling process' memory cgroup indicates memory pressure of the given level,
    30  // as specified by Linux's Documentation/cgroup-v1/memory.txt.
    31  //
    32  // If NotifyCurrentMemcgPressureCallback succeeds, it returns a function that
    33  // terminates the requested memory pressure notifications. This function may be
    34  // called at most once.
    35  func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error) {
    36  	cgdir, err := currentCgroupDirectory("memory")
    37  	if err != nil {
    38  		return nil, err
    39  	}
    40  
    41  	pressurePath := path.Join(cgdir, "memory.pressure_level")
    42  	pressureFile, err := os.Open(pressurePath)
    43  	if err != nil {
    44  		return nil, err
    45  	}
    46  	defer pressureFile.Close()
    47  
    48  	eventControlPath := path.Join(cgdir, "cgroup.event_control")
    49  	eventControlFile, err := os.OpenFile(eventControlPath, os.O_WRONLY, 0)
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  	defer eventControlFile.Close()
    54  
    55  	eventFD, err := eventfd.Create()
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	// Don't use fmt.Fprintf since the whole string needs to be written in a
    61  	// single unix.
    62  	eventControlStr := fmt.Sprintf("%d %d %s", eventFD.FD(), pressureFile.Fd(), level)
    63  	if n, err := eventControlFile.Write([]byte(eventControlStr)); n != len(eventControlStr) || err != nil {
    64  		eventFD.Close()
    65  		return nil, fmt.Errorf("error writing %q to %s: got (%d, %v), wanted (%d, nil)", eventControlStr, eventControlPath, n, err, len(eventControlStr))
    66  	}
    67  
    68  	log.Debugf("Receiving memory pressure level notifications from %s at level %q", pressurePath, level)
    69  	const sizeofUint64 = 8
    70  	// The most significant bit of the eventfd value is set by the stop
    71  	// function, which is practically unambiguous since it's not plausible for
    72  	// 2**63 pressure events to occur between eventfd reads.
    73  	const stopVal = 1 << 63
    74  	stopCh := make(chan struct{})
    75  	go func() { // S/R-SAFE: f provides synchronization if necessary
    76  		for {
    77  			val, err := eventFD.Read()
    78  			if err != nil {
    79  				panic(fmt.Sprintf("failed to read from memory pressure level eventfd: %v", err))
    80  			}
    81  			if val >= stopVal {
    82  				// Assume this was due to the notifier's "destructor" (the
    83  				// function returned by NotifyCurrentMemcgPressureCallback
    84  				// below) being called.
    85  				eventFD.Close()
    86  				close(stopCh)
    87  				return
    88  			}
    89  			f()
    90  		}
    91  	}()
    92  	return func() {
    93  		eventFD.Write(stopVal)
    94  		<-stopCh
    95  	}, nil
    96  }