github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/pgalloc/save_restore.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pgalloc
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"runtime"
    23  
    24  	"golang.org/x/sys/unix"
    25  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    26  	"github.com/metacubex/gvisor/pkg/hostarch"
    27  	"github.com/metacubex/gvisor/pkg/log"
    28  	"github.com/metacubex/gvisor/pkg/sentry/usage"
    29  	"github.com/metacubex/gvisor/pkg/state"
    30  	"github.com/metacubex/gvisor/pkg/state/wire"
    31  )
    32  
    33  // SaveTo writes f's state to the given stream.
    34  func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error {
    35  	// Wait for reclaim.
    36  	f.mu.Lock()
    37  	defer f.mu.Unlock()
    38  	for f.reclaimable {
    39  		f.reclaimCond.Signal()
    40  		f.mu.Unlock()
    41  		runtime.Gosched()
    42  		f.mu.Lock()
    43  	}
    44  
    45  	// Ensure that there are no pending evictions.
    46  	if len(f.evictable) != 0 {
    47  		panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable)))
    48  	}
    49  
    50  	// Ensure that all pages that contain data have knownCommitted set, since
    51  	// we only store knownCommitted pages below.
    52  	zeroPage := make([]byte, hostarch.PageSize)
    53  	err := f.updateUsageLocked(0, nil, func(bs []byte, committed []byte) error {
    54  		for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize {
    55  			i := pgoff / hostarch.PageSize
    56  			pg := bs[pgoff : pgoff+hostarch.PageSize]
    57  			if !bytes.Equal(pg, zeroPage) {
    58  				committed[i] = 1
    59  				continue
    60  			}
    61  			committed[i] = 0
    62  			// Reading the page caused it to be committed; decommit it to
    63  			// reduce memory usage.
    64  			//
    65  			// "MADV_REMOVE [...] Free up a given range of pages and its
    66  			// associated backing store. This is equivalent to punching a hole
    67  			// in the corresponding byte range of the backing store (see
    68  			// fallocate(2))." - madvise(2)
    69  			if err := unix.Madvise(pg, unix.MADV_REMOVE); err != nil {
    70  				// This doesn't impact the correctness of saved memory, it
    71  				// just means that we're incrementally more likely to OOM.
    72  				// Complain, but don't abort saving.
    73  				log.Warningf("Decommitting page %p while saving failed: %v", pg, err)
    74  			}
    75  		}
    76  		return nil
    77  	})
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	// Save metadata.
    83  	if _, err := state.Save(ctx, w, &f.fileSize); err != nil {
    84  		return err
    85  	}
    86  	if _, err := state.Save(ctx, w, &f.usage); err != nil {
    87  		return err
    88  	}
    89  
    90  	// Dump out committed pages.
    91  	for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
    92  		if !seg.Value().knownCommitted {
    93  			continue
    94  		}
    95  		// Write a header to distinguish from objects.
    96  		if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil {
    97  			return err
    98  		}
    99  		// Write out data.
   100  		var ioErr error
   101  		err := f.forEachMappingSlice(seg.Range(), func(s []byte) {
   102  			if ioErr != nil {
   103  				return
   104  			}
   105  			_, ioErr = w.Write(s)
   106  		})
   107  		if ioErr != nil {
   108  			return ioErr
   109  		}
   110  		if err != nil {
   111  			return err
   112  		}
   113  	}
   114  
   115  	return nil
   116  }
   117  
   118  // MarkSavable marks f as savable.
   119  func (f *MemoryFile) MarkSavable() {
   120  	f.mu.Lock()
   121  	defer f.mu.Unlock()
   122  	f.savable = true
   123  }
   124  
   125  // IsSavable returns true if f is savable.
   126  func (f *MemoryFile) IsSavable() bool {
   127  	f.mu.Lock()
   128  	defer f.mu.Unlock()
   129  	return f.savable
   130  }
   131  
   132  // RestoreID returns the restore ID for f.
   133  func (f *MemoryFile) RestoreID() string {
   134  	return f.opts.RestoreID
   135  }
   136  
   137  // LoadFrom loads MemoryFile state from the given stream.
   138  func (f *MemoryFile) LoadFrom(ctx context.Context, r wire.Reader) error {
   139  	// Load metadata.
   140  	if _, err := state.Load(ctx, r, &f.fileSize); err != nil {
   141  		return err
   142  	}
   143  	if err := f.file.Truncate(f.fileSize); err != nil {
   144  		return err
   145  	}
   146  	newMappings := make([]uintptr, f.fileSize>>chunkShift)
   147  	f.mappings.Store(&newMappings)
   148  	if _, err := state.Load(ctx, r, &f.usage); err != nil {
   149  		return err
   150  	}
   151  
   152  	// Try to map committed chunks concurrently: For any given chunk, either
   153  	// this loop or the following one will mmap the chunk first and cache it in
   154  	// f.mappings for the other, but this loop is likely to run ahead of the
   155  	// other since it doesn't do any work between mmaps. The rest of this
   156  	// function doesn't mutate f.usage, so it's safe to iterate concurrently.
   157  	mapperDone := make(chan struct{})
   158  	mapperCanceled := atomicbitops.FromInt32(0)
   159  	go func() { // S/R-SAFE: see comment
   160  		defer func() { close(mapperDone) }()
   161  		for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   162  			if mapperCanceled.Load() != 0 {
   163  				return
   164  			}
   165  			if seg.Value().knownCommitted {
   166  				f.forEachMappingSlice(seg.Range(), func(s []byte) {})
   167  			}
   168  		}
   169  	}()
   170  	defer func() {
   171  		mapperCanceled.Store(1)
   172  		<-mapperDone
   173  	}()
   174  
   175  	// Load committed pages.
   176  	for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   177  		if !seg.Value().knownCommitted {
   178  			continue
   179  		}
   180  		// Verify header.
   181  		length, object, err := state.ReadHeader(r)
   182  		if err != nil {
   183  			return err
   184  		}
   185  		if object {
   186  			// Not expected.
   187  			return fmt.Errorf("unexpected object")
   188  		}
   189  		if expected := uint64(seg.Range().Length()); length != expected {
   190  			// Size mismatch.
   191  			return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length)
   192  		}
   193  		// Read data.
   194  		var ioErr error
   195  		err = f.forEachMappingSlice(seg.Range(), func(s []byte) {
   196  			if ioErr != nil {
   197  				return
   198  			}
   199  			_, ioErr = io.ReadFull(r, s)
   200  		})
   201  		if ioErr != nil {
   202  			return ioErr
   203  		}
   204  		if err != nil {
   205  			return err
   206  		}
   207  
   208  		// Update accounting for restored pages. We need to do this here since
   209  		// these segments are marked as "known committed", and will be skipped
   210  		// over on accounting scans.
   211  		usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind, seg.Value().memCgID)
   212  	}
   213  
   214  	return nil
   215  }