github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/pgalloc/save_restore.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pgalloc 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "runtime" 23 24 "golang.org/x/sys/unix" 25 "github.com/metacubex/gvisor/pkg/atomicbitops" 26 "github.com/metacubex/gvisor/pkg/hostarch" 27 "github.com/metacubex/gvisor/pkg/log" 28 "github.com/metacubex/gvisor/pkg/sentry/usage" 29 "github.com/metacubex/gvisor/pkg/state" 30 "github.com/metacubex/gvisor/pkg/state/wire" 31 ) 32 33 // SaveTo writes f's state to the given stream. 34 func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error { 35 // Wait for reclaim. 36 f.mu.Lock() 37 defer f.mu.Unlock() 38 for f.reclaimable { 39 f.reclaimCond.Signal() 40 f.mu.Unlock() 41 runtime.Gosched() 42 f.mu.Lock() 43 } 44 45 // Ensure that there are no pending evictions. 46 if len(f.evictable) != 0 { 47 panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable))) 48 } 49 50 // Ensure that all pages that contain data have knownCommitted set, since 51 // we only store knownCommitted pages below. 52 zeroPage := make([]byte, hostarch.PageSize) 53 err := f.updateUsageLocked(0, nil, func(bs []byte, committed []byte) error { 54 for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize { 55 i := pgoff / hostarch.PageSize 56 pg := bs[pgoff : pgoff+hostarch.PageSize] 57 if !bytes.Equal(pg, zeroPage) { 58 committed[i] = 1 59 continue 60 } 61 committed[i] = 0 62 // Reading the page caused it to be committed; decommit it to 63 // reduce memory usage. 64 // 65 // "MADV_REMOVE [...] Free up a given range of pages and its 66 // associated backing store. This is equivalent to punching a hole 67 // in the corresponding byte range of the backing store (see 68 // fallocate(2))." - madvise(2) 69 if err := unix.Madvise(pg, unix.MADV_REMOVE); err != nil { 70 // This doesn't impact the correctness of saved memory, it 71 // just means that we're incrementally more likely to OOM. 72 // Complain, but don't abort saving. 73 log.Warningf("Decommitting page %p while saving failed: %v", pg, err) 74 } 75 } 76 return nil 77 }) 78 if err != nil { 79 return err 80 } 81 82 // Save metadata. 83 if _, err := state.Save(ctx, w, &f.fileSize); err != nil { 84 return err 85 } 86 if _, err := state.Save(ctx, w, &f.usage); err != nil { 87 return err 88 } 89 90 // Dump out committed pages. 91 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 92 if !seg.Value().knownCommitted { 93 continue 94 } 95 // Write a header to distinguish from objects. 96 if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil { 97 return err 98 } 99 // Write out data. 100 var ioErr error 101 err := f.forEachMappingSlice(seg.Range(), func(s []byte) { 102 if ioErr != nil { 103 return 104 } 105 _, ioErr = w.Write(s) 106 }) 107 if ioErr != nil { 108 return ioErr 109 } 110 if err != nil { 111 return err 112 } 113 } 114 115 return nil 116 } 117 118 // MarkSavable marks f as savable. 119 func (f *MemoryFile) MarkSavable() { 120 f.mu.Lock() 121 defer f.mu.Unlock() 122 f.savable = true 123 } 124 125 // IsSavable returns true if f is savable. 126 func (f *MemoryFile) IsSavable() bool { 127 f.mu.Lock() 128 defer f.mu.Unlock() 129 return f.savable 130 } 131 132 // RestoreID returns the restore ID for f. 133 func (f *MemoryFile) RestoreID() string { 134 return f.opts.RestoreID 135 } 136 137 // LoadFrom loads MemoryFile state from the given stream. 138 func (f *MemoryFile) LoadFrom(ctx context.Context, r wire.Reader) error { 139 // Load metadata. 140 if _, err := state.Load(ctx, r, &f.fileSize); err != nil { 141 return err 142 } 143 if err := f.file.Truncate(f.fileSize); err != nil { 144 return err 145 } 146 newMappings := make([]uintptr, f.fileSize>>chunkShift) 147 f.mappings.Store(&newMappings) 148 if _, err := state.Load(ctx, r, &f.usage); err != nil { 149 return err 150 } 151 152 // Try to map committed chunks concurrently: For any given chunk, either 153 // this loop or the following one will mmap the chunk first and cache it in 154 // f.mappings for the other, but this loop is likely to run ahead of the 155 // other since it doesn't do any work between mmaps. The rest of this 156 // function doesn't mutate f.usage, so it's safe to iterate concurrently. 157 mapperDone := make(chan struct{}) 158 mapperCanceled := atomicbitops.FromInt32(0) 159 go func() { // S/R-SAFE: see comment 160 defer func() { close(mapperDone) }() 161 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 162 if mapperCanceled.Load() != 0 { 163 return 164 } 165 if seg.Value().knownCommitted { 166 f.forEachMappingSlice(seg.Range(), func(s []byte) {}) 167 } 168 } 169 }() 170 defer func() { 171 mapperCanceled.Store(1) 172 <-mapperDone 173 }() 174 175 // Load committed pages. 176 for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 177 if !seg.Value().knownCommitted { 178 continue 179 } 180 // Verify header. 181 length, object, err := state.ReadHeader(r) 182 if err != nil { 183 return err 184 } 185 if object { 186 // Not expected. 187 return fmt.Errorf("unexpected object") 188 } 189 if expected := uint64(seg.Range().Length()); length != expected { 190 // Size mismatch. 191 return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length) 192 } 193 // Read data. 194 var ioErr error 195 err = f.forEachMappingSlice(seg.Range(), func(s []byte) { 196 if ioErr != nil { 197 return 198 } 199 _, ioErr = io.ReadFull(r, s) 200 }) 201 if ioErr != nil { 202 return ioErr 203 } 204 if err != nil { 205 return err 206 } 207 208 // Update accounting for restored pages. We need to do this here since 209 // these segments are marked as "known committed", and will be skipped 210 // over on accounting scans. 211 usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind, seg.Value().memCgID) 212 } 213 214 return nil 215 }