github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/pgalloc/save_restore.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pgalloc
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"runtime"
    23  
    24  	"golang.org/x/sys/unix"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/usage"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/state"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/state/wire"
    31  )
    32  
    33  // SaveTo writes f's state to the given stream.
    34  func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error {
    35  	// Wait for reclaim.
    36  	f.mu.Lock()
    37  	defer f.mu.Unlock()
    38  	for f.reclaimable {
    39  		f.reclaimCond.Signal()
    40  		f.mu.Unlock()
    41  		runtime.Gosched()
    42  		f.mu.Lock()
    43  	}
    44  
    45  	// Ensure that there are no pending evictions.
    46  	if len(f.evictable) != 0 {
    47  		panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable)))
    48  	}
    49  
    50  	// Ensure that all pages that contain data have knownCommitted set, since
    51  	// we only store knownCommitted pages below.
    52  	zeroPage := make([]byte, hostarch.PageSize)
    53  	err := f.updateUsageLocked(0, func(bs []byte, committed []byte) error {
    54  		for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize {
    55  			i := pgoff / hostarch.PageSize
    56  			pg := bs[pgoff : pgoff+hostarch.PageSize]
    57  			if !bytes.Equal(pg, zeroPage) {
    58  				committed[i] = 1
    59  				continue
    60  			}
    61  			committed[i] = 0
    62  			// Reading the page caused it to be committed; decommit it to
    63  			// reduce memory usage.
    64  			//
    65  			// "MADV_REMOVE [...] Free up a given range of pages and its
    66  			// associated backing store. This is equivalent to punching a hole
    67  			// in the corresponding byte range of the backing store (see
    68  			// fallocate(2))." - madvise(2)
    69  			if err := unix.Madvise(pg, unix.MADV_REMOVE); err != nil {
    70  				// This doesn't impact the correctness of saved memory, it
    71  				// just means that we're incrementally more likely to OOM.
    72  				// Complain, but don't abort saving.
    73  				log.Warningf("Decommitting page %p while saving failed: %v", pg, err)
    74  			}
    75  		}
    76  		return nil
    77  	})
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	// Save metadata.
    83  	if _, err := state.Save(ctx, w, &f.fileSize); err != nil {
    84  		return err
    85  	}
    86  	if _, err := state.Save(ctx, w, &f.usage); err != nil {
    87  		return err
    88  	}
    89  
    90  	// Dump out committed pages.
    91  	for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
    92  		if !seg.Value().knownCommitted {
    93  			continue
    94  		}
    95  		// Write a header to distinguish from objects.
    96  		if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil {
    97  			return err
    98  		}
    99  		// Write out data.
   100  		var ioErr error
   101  		err := f.forEachMappingSlice(seg.Range(), func(s []byte) {
   102  			if ioErr != nil {
   103  				return
   104  			}
   105  			_, ioErr = w.Write(s)
   106  		})
   107  		if ioErr != nil {
   108  			return ioErr
   109  		}
   110  		if err != nil {
   111  			return err
   112  		}
   113  	}
   114  
   115  	return nil
   116  }
   117  
   118  // LoadFrom loads MemoryFile state from the given stream.
   119  func (f *MemoryFile) LoadFrom(ctx context.Context, r wire.Reader) error {
   120  	// Load metadata.
   121  	if _, err := state.Load(ctx, r, &f.fileSize); err != nil {
   122  		return err
   123  	}
   124  	if err := f.file.Truncate(f.fileSize); err != nil {
   125  		return err
   126  	}
   127  	newMappings := make([]uintptr, f.fileSize>>chunkShift)
   128  	f.mappings.Store(newMappings)
   129  	if _, err := state.Load(ctx, r, &f.usage); err != nil {
   130  		return err
   131  	}
   132  
   133  	// Try to map committed chunks concurrently: For any given chunk, either
   134  	// this loop or the following one will mmap the chunk first and cache it in
   135  	// f.mappings for the other, but this loop is likely to run ahead of the
   136  	// other since it doesn't do any work between mmaps. The rest of this
   137  	// function doesn't mutate f.usage, so it's safe to iterate concurrently.
   138  	mapperDone := make(chan struct{})
   139  	mapperCanceled := atomicbitops.FromInt32(0)
   140  	go func() { // S/R-SAFE: see comment
   141  		defer func() { close(mapperDone) }()
   142  		for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   143  			if mapperCanceled.Load() != 0 {
   144  				return
   145  			}
   146  			if seg.Value().knownCommitted {
   147  				f.forEachMappingSlice(seg.Range(), func(s []byte) {})
   148  			}
   149  		}
   150  	}()
   151  	defer func() {
   152  		mapperCanceled.Store(1)
   153  		<-mapperDone
   154  	}()
   155  
   156  	// Load committed pages.
   157  	for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   158  		if !seg.Value().knownCommitted {
   159  			continue
   160  		}
   161  		// Verify header.
   162  		length, object, err := state.ReadHeader(r)
   163  		if err != nil {
   164  			return err
   165  		}
   166  		if object {
   167  			// Not expected.
   168  			return fmt.Errorf("unexpected object")
   169  		}
   170  		if expected := uint64(seg.Range().Length()); length != expected {
   171  			// Size mismatch.
   172  			return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length)
   173  		}
   174  		// Read data.
   175  		var ioErr error
   176  		err = f.forEachMappingSlice(seg.Range(), func(s []byte) {
   177  			if ioErr != nil {
   178  				return
   179  			}
   180  			_, ioErr = io.ReadFull(r, s)
   181  		})
   182  		if ioErr != nil {
   183  			return ioErr
   184  		}
   185  		if err != nil {
   186  			return err
   187  		}
   188  
   189  		// Update accounting for restored pages. We need to do this here since
   190  		// these segments are marked as "known committed", and will be skipped
   191  		// over on accounting scans.
   192  		usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind, seg.Value().memCgID)
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  // MemoryFileProvider provides the MemoryFile method.
   199  //
   200  // This type exists to work around a save/restore defect. The only object in a
   201  // saved object graph that S/R allows to be replaced at time of restore is the
   202  // starting point of the restore, kernel.Kernel. However, the MemoryFile
   203  // changes between save and restore as well, so objects that need persistent
   204  // access to the MemoryFile must instead store a pointer to the Kernel and call
   205  // Kernel.MemoryFile() as required. In most cases, depending on the kernel
   206  // package directly would create a package dependency loop, so the stored
   207  // pointer must instead be a MemoryProvider interface object. Correspondingly,
   208  // kernel.Kernel is the only implementation of this interface.
   209  type MemoryFileProvider interface {
   210  	// MemoryFile returns the Kernel MemoryFile.
   211  	MemoryFile() *MemoryFile
   212  }