github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/boot/restore.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package boot 16 17 import ( 18 "fmt" 19 "os" 20 21 "github.com/metacubex/gvisor/pkg/context" 22 "github.com/metacubex/gvisor/pkg/sentry/fsimpl/host" 23 "github.com/metacubex/gvisor/pkg/sentry/inet" 24 "github.com/metacubex/gvisor/pkg/sentry/kernel" 25 "github.com/metacubex/gvisor/pkg/sentry/socket/hostinet" 26 "github.com/metacubex/gvisor/pkg/sentry/socket/netstack" 27 "github.com/metacubex/gvisor/pkg/sentry/state" 28 "github.com/metacubex/gvisor/pkg/sentry/time" 29 "github.com/metacubex/gvisor/pkg/sentry/vfs" 30 "github.com/metacubex/gvisor/pkg/sentry/watchdog" 31 "github.com/metacubex/gvisor/pkg/tcpip/stack" 32 "github.com/metacubex/gvisor/runsc/boot/pprof" 33 ) 34 35 type restorer struct { 36 container *containerInfo 37 stateFile *os.File 38 deviceFile *os.File 39 } 40 41 func createNetworkNamespaceForRestore(l *Loader) (*stack.Stack, *inet.Namespace, error) { 42 creds := getRootCredentials(l.root.spec, l.root.conf, nil /* UserNamespace */) 43 if creds == nil { 44 return nil, nil, fmt.Errorf("getting root credentials") 45 } 46 47 // Save the current network stack to slap on top of the one that was restored. 48 curNetwork := l.k.RootNetworkNamespace().Stack() 49 eps, ok := curNetwork.(*netstack.Stack) 50 if !ok { 51 return nil, inet.NewRootNamespace(hostinet.NewStack(), nil, creds.UserNamespace), nil 52 } 53 54 creator := &sandboxNetstackCreator{ 55 clock: l.k.Timekeeper(), 56 uniqueID: l.k, 57 allowPacketEndpointWrite: l.root.conf.AllowPacketEndpointWrite, 58 } 59 return eps.Stack, inet.NewRootNamespace(curNetwork, creator, creds.UserNamespace), nil 60 } 61 62 func (r *restorer) restore(l *Loader) error { 63 // Create a new root network namespace with the network stack of the 64 // old kernel to preserve the existing network configuration. 65 oldStack, netns, err := createNetworkNamespaceForRestore(l) 66 if err != nil { 67 return fmt.Errorf("creating network: %w", err) 68 } 69 70 // Reset the network stack in the network namespace to nil before 71 // replacing the kernel. This will not free the network stack when this 72 // old kernel is released. 73 l.k.RootNetworkNamespace().ResetStack() 74 75 p, err := createPlatform(l.root.conf, r.deviceFile) 76 if err != nil { 77 return fmt.Errorf("creating platform: %v", err) 78 } 79 80 // Start the old watchdog before replacing it with a new one below. 81 l.watchdog.Start() 82 83 // Release the kernel and replace it with a new one that will be restored into. 84 if l.k != nil { 85 l.k.Release() 86 } 87 l.k = &kernel.Kernel{ 88 Platform: p, 89 } 90 91 mf, err := createMemoryFile() 92 if err != nil { 93 return fmt.Errorf("creating memory file: %v", err) 94 } 95 l.k.SetMemoryFile(mf) 96 97 if l.root.conf.ProfileEnable { 98 // pprof.Initialize opens /proc/self/maps, so has to be called before 99 // installing seccomp filters. 100 pprof.Initialize() 101 } 102 103 // Seccomp filters have to be applied before vfs restore and before parsing 104 // the state file. 105 if err := l.installSeccompFilters(); err != nil { 106 return err 107 } 108 109 // Set up the restore environment. 110 ctx := l.k.SupervisorContext() 111 if oldStack != nil { 112 ctx = context.WithValue(ctx, stack.CtxRestoreStack, oldStack) 113 } 114 115 // TODO(b/298078576): Need to process hints here probably 116 mntr := newContainerMounter(&l.root, l.k, l.mountHints, l.sharedMounts, l.productName, l.sandboxID) 117 ctx, err = mntr.configureRestore(ctx) 118 if err != nil { 119 return fmt.Errorf("configuring filesystem restore: %v", err) 120 } 121 122 fdmap := vfs.RestoreFilesystemFDMapFromContext(ctx) 123 for appFD, fd := range r.container.stdioFDs { 124 key := host.MakeRestoreID(r.container.containerName, appFD) 125 fdmap[key] = fd.Release() 126 } 127 for _, customFD := range r.container.passFDs { 128 key := host.MakeRestoreID(r.container.containerName, customFD.guest) 129 fdmap[key] = customFD.host.FD() 130 } 131 132 // Load the state. 133 loadOpts := state.LoadOpts{Source: r.stateFile} 134 if err := loadOpts.Load(ctx, l.k, nil, netns.Stack(), time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { 135 return err 136 } 137 138 // Since we have a new kernel we also must make a new watchdog. 139 dogOpts := watchdog.DefaultOpts 140 dogOpts.TaskTimeoutAction = l.root.conf.WatchdogAction 141 dog := watchdog.New(l.k, dogOpts) 142 143 // Change the loader fields to reflect the changes made when restoring. 144 l.watchdog = dog 145 l.root.procArgs = kernel.CreateProcessArgs{} 146 l.restore = true 147 148 // Reinitialize the sandbox ID and processes map. Note that it doesn't 149 // restore the state of multiple containers, nor exec processes. 150 l.sandboxID = r.container.cid 151 152 l.mu.Lock() 153 defer l.mu.Unlock() 154 155 // Set new container ID if it has changed. 156 tasks := l.k.TaskSet().Root.Tasks() 157 if tasks[0].ContainerID() != l.sandboxID { // There must be at least 1 task. 158 for _, task := range tasks { 159 task.RestoreContainerID(l.sandboxID) 160 } 161 } 162 163 eid := execID{cid: l.sandboxID} 164 l.processes = map[execID]*execProcess{ 165 eid: { 166 tg: l.k.GlobalInit(), 167 }, 168 } 169 170 return nil 171 }