github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/pgalloc/save_restore.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pgalloc 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "runtime" 23 24 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 25 "github.com/MerlinKodo/gvisor/pkg/hostarch" 26 "github.com/MerlinKodo/gvisor/pkg/log" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/usage" 28 "github.com/MerlinKodo/gvisor/pkg/state" 29 "github.com/MerlinKodo/gvisor/pkg/state/wire" 30 "golang.org/x/sys/unix" 31 ) 32 33 // SaveTo writes f's state to the given stream. 34 func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error { 35 // Wait for reclaim. 36 f.mu.Lock() 37 defer f.mu.Unlock() 38 for f.reclaimable { 39 f.reclaimCond.Signal() 40 f.mu.Unlock() 41 runtime.Gosched() 42 f.mu.Lock() 43 } 44 45 // Ensure that there are no pending evictions. 46 if len(f.evictable) != 0 { 47 panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable))) 48 } 49 50 // Ensure that all pages that contain data have knownCommitted set, since 51 // we only store knownCommitted pages below. 52 zeroPage := make([]byte, hostarch.PageSize) 53 err := f.updateUsageLocked(0, 0, func(bs []byte, committed []byte) error { 54 for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize { 55 i := pgoff / hostarch.PageSize 56 pg := bs[pgoff : pgoff+hostarch.PageSize] 57 if !bytes.Equal(pg, zeroPage) { 58 committed[i] = 1 59 continue 60 } 61 committed[i] = 0 62 // Reading the page caused it to be committed; decommit it to 63 // reduce memory usage. 64 // 65 // "MADV_REMOVE [...] Free up a given range of pages and its 66 // associated backing store. This is equivalent to punching a hole 67 // in the corresponding byte range of the backing store (see 68 // fallocate(2))." - madvise(2) 69 if err := unix.Madvise(pg, unix.MADV_REMOVE); err != nil { 70 // This doesn't impact the correctness of saved memory, it 71 // just means that we're incrementally more likely to OOM. 72 // Complain, but don't abort saving. 73 log.Warningf("Decommitting page %p while saving failed: %v", pg, err) 74 } 75 } 76 return nil 77 }) 78 if err != nil { 79 return err 80 } 81 82 // Save metadata. 83 if _, err := state.Save(ctx, w, &f.fileSize); err != nil { 84 return err 85 } 86 if _, err := state.Save(ctx, w, &f.usage); err != nil { 87 return err 88 } 89 90 // Dump out committed pages. 91 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 92 if !seg.Value().knownCommitted { 93 continue 94 } 95 // Write a header to distinguish from objects. 96 if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil { 97 return err 98 } 99 // Write out data. 100 var ioErr error 101 err := f.forEachMappingSlice(seg.Range(), func(s []byte) { 102 if ioErr != nil { 103 return 104 } 105 _, ioErr = w.Write(s) 106 }) 107 if ioErr != nil { 108 return ioErr 109 } 110 if err != nil { 111 return err 112 } 113 } 114 115 return nil 116 } 117 118 // LoadFrom loads MemoryFile state from the given stream. 119 func (f *MemoryFile) LoadFrom(ctx context.Context, r wire.Reader) error { 120 // Load metadata. 121 if _, err := state.Load(ctx, r, &f.fileSize); err != nil { 122 return err 123 } 124 if err := f.file.Truncate(f.fileSize); err != nil { 125 return err 126 } 127 newMappings := make([]uintptr, f.fileSize>>chunkShift) 128 f.mappings.Store(newMappings) 129 if _, err := state.Load(ctx, r, &f.usage); err != nil { 130 return err 131 } 132 133 // Try to map committed chunks concurrently: For any given chunk, either 134 // this loop or the following one will mmap the chunk first and cache it in 135 // f.mappings for the other, but this loop is likely to run ahead of the 136 // other since it doesn't do any work between mmaps. The rest of this 137 // function doesn't mutate f.usage, so it's safe to iterate concurrently. 138 mapperDone := make(chan struct{}) 139 mapperCanceled := atomicbitops.FromInt32(0) 140 go func() { // S/R-SAFE: see comment 141 defer func() { close(mapperDone) }() 142 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 143 if mapperCanceled.Load() != 0 { 144 return 145 } 146 if seg.Value().knownCommitted { 147 f.forEachMappingSlice(seg.Range(), func(s []byte) {}) 148 } 149 } 150 }() 151 defer func() { 152 mapperCanceled.Store(1) 153 <-mapperDone 154 }() 155 156 // Load committed pages. 157 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 158 if !seg.Value().knownCommitted { 159 continue 160 } 161 // Verify header. 162 length, object, err := state.ReadHeader(r) 163 if err != nil { 164 return err 165 } 166 if object { 167 // Not expected. 168 return fmt.Errorf("unexpected object") 169 } 170 if expected := uint64(seg.Range().Length()); length != expected { 171 // Size mismatch. 172 return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length) 173 } 174 // Read data. 175 var ioErr error 176 err = f.forEachMappingSlice(seg.Range(), func(s []byte) { 177 if ioErr != nil { 178 return 179 } 180 _, ioErr = io.ReadFull(r, s) 181 }) 182 if ioErr != nil { 183 return ioErr 184 } 185 if err != nil { 186 return err 187 } 188 189 // Update accounting for restored pages. We need to do this here since 190 // these segments are marked as "known committed", and will be skipped 191 // over on accounting scans. 192 usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind, seg.Value().memCgID) 193 } 194 195 return nil 196 } 197 198 // MemoryFileProvider provides the MemoryFile method. 199 // 200 // This type exists to work around a save/restore defect. The only object in a 201 // saved object graph that S/R allows to be replaced at time of restore is the 202 // starting point of the restore, kernel.Kernel. However, the MemoryFile 203 // changes between save and restore as well, so objects that need persistent 204 // access to the MemoryFile must instead store a pointer to the Kernel and call 205 // Kernel.MemoryFile() as required. In most cases, depending on the kernel 206 // package directly would create a package dependency loop, so the stored 207 // pointer must instead be a MemoryProvider interface object. Correspondingly, 208 // kernel.Kernel is the only implementation of this interface. 209 type MemoryFileProvider interface { 210 // MemoryFile returns the Kernel MemoryFile. 211 MemoryFile() *MemoryFile 212 }