github.com/rish1988/moby@v25.0.2+incompatible/daemon/containerfs_linux.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strings" 11 12 "github.com/containerd/log" 13 "github.com/hashicorp/go-multierror" 14 "github.com/moby/sys/mount" 15 "github.com/moby/sys/symlink" 16 "golang.org/x/sys/unix" 17 18 "github.com/docker/docker/api/types" 19 "github.com/docker/docker/container" 20 "github.com/docker/docker/internal/compatcontext" 21 "github.com/docker/docker/internal/mounttree" 22 "github.com/docker/docker/internal/unshare" 23 "github.com/docker/docker/pkg/fileutils" 24 ) 25 26 type future struct { 27 fn func() error 28 res chan<- error 29 } 30 31 // containerFSView allows functions to be run in the context of a container's 32 // filesystem. Inside these functions, the root directory is the container root 33 // for all native OS filesystem APIs, including, but not limited to, the [os] 34 // and [golang.org/x/sys/unix] packages. The view of the container's filesystem 35 // is live and read-write. Each view has its own private set of tmpfs mounts. 36 // Any files written under a tmpfs mount are not visible to processes inside the 37 // container nor any other view of the container's filesystem, and vice versa. 38 // 39 // Each view has its own current working directory which is initialized to the 40 // root of the container filesystem and can be changed with [os.Chdir]. Changes 41 // to the current directory persist across successive [*containerFSView.RunInFS] 42 // and [*containerFSView.GoInFS] calls. 43 // 44 // Multiple views of the same container filesystem can coexist at the same time. 45 // Only one function can be running in a particular filesystem view at any given 46 // time. Calls to [*containerFSView.RunInFS] or [*containerFSView.GoInFS] will 47 // block while another function is running. If more than one call is blocked 48 // concurrently, the order they are unblocked is undefined. 49 type containerFSView struct { 50 d *Daemon 51 ctr *container.Container 52 todo chan future 53 done chan error 54 } 55 56 // openContainerFS opens a new view of the container's filesystem. 57 func (daemon *Daemon) openContainerFS(container *container.Container) (_ *containerFSView, err error) { 58 ctx := context.TODO() 59 60 if err := daemon.Mount(container); err != nil { 61 return nil, err 62 } 63 defer func() { 64 if err != nil { 65 _ = daemon.Unmount(container) 66 } 67 }() 68 69 mounts, cleanup, err := daemon.setupMounts(ctx, container) 70 if err != nil { 71 return nil, err 72 } 73 defer func() { 74 ctx := compatcontext.WithoutCancel(ctx) 75 cleanup(ctx) 76 if err != nil { 77 _ = container.UnmountVolumes(ctx, daemon.LogVolumeEvent) 78 } 79 }() 80 81 // Setup in initial mount namespace complete. We're ready to unshare the 82 // mount namespace and bind the volume mounts into that private view of 83 // the container FS. 84 todo := make(chan future) 85 done := make(chan error) 86 err = unshare.Go(unix.CLONE_NEWNS, 87 func() error { 88 if err := mount.MakeRSlave("/"); err != nil { 89 return err 90 } 91 for _, m := range mounts { 92 dest, err := container.GetResourcePath(m.Destination) 93 if err != nil { 94 return err 95 } 96 97 var stat os.FileInfo 98 stat, err = os.Stat(m.Source) 99 if err != nil { 100 return err 101 } 102 if err := fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil { 103 return err 104 } 105 106 bindMode := "rbind" 107 if m.NonRecursive { 108 bindMode = "bind" 109 } 110 writeMode := "ro" 111 if m.Writable { 112 writeMode = "rw" 113 if m.ReadOnlyNonRecursive { 114 return errors.New("options conflict: Writable && ReadOnlyNonRecursive") 115 } 116 if m.ReadOnlyForceRecursive { 117 return errors.New("options conflict: Writable && ReadOnlyForceRecursive") 118 } 119 } 120 if m.ReadOnlyNonRecursive && m.ReadOnlyForceRecursive { 121 return errors.New("options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive") 122 } 123 124 // openContainerFS() is called for temporary mounts 125 // outside the container. Soon these will be unmounted 126 // with lazy unmount option and given we have mounted 127 // them rbind, all the submounts will propagate if these 128 // are shared. If daemon is running in host namespace 129 // and has / as shared then these unmounts will 130 // propagate and unmount original mount as well. So make 131 // all these mounts rprivate. Do not use propagation 132 // property of volume as that should apply only when 133 // mounting happens inside the container. 134 opts := strings.Join([]string{bindMode, writeMode, "rprivate"}, ",") 135 if err := mount.Mount(m.Source, dest, "", opts); err != nil { 136 return err 137 } 138 139 if !m.Writable && !m.ReadOnlyNonRecursive { 140 if err := makeMountRRO(dest); err != nil { 141 if m.ReadOnlyForceRecursive { 142 return err 143 } else { 144 log.G(context.TODO()).WithError(err).Debugf("Failed to make %q recursively read-only", dest) 145 } 146 } 147 } 148 } 149 150 return mounttree.SwitchRoot(container.BaseFS) 151 }, 152 func() { 153 defer close(done) 154 155 for it := range todo { 156 err := it.fn() 157 if it.res != nil { 158 it.res <- err 159 } 160 } 161 162 // The thread will terminate when this goroutine returns, taking the 163 // mount namespace and all the volume bind-mounts with it. 164 }, 165 ) 166 if err != nil { 167 return nil, err 168 } 169 vw := &containerFSView{ 170 d: daemon, 171 ctr: container, 172 todo: todo, 173 done: done, 174 } 175 runtime.SetFinalizer(vw, (*containerFSView).Close) 176 return vw, nil 177 } 178 179 // RunInFS synchronously runs fn in the context of the container filesytem and 180 // passes through its return value. 181 // 182 // The container filesystem is only visible to functions called in the same 183 // goroutine as fn. Goroutines started from fn will see the host's filesystem. 184 func (vw *containerFSView) RunInFS(ctx context.Context, fn func() error) error { 185 res := make(chan error) 186 select { 187 case vw.todo <- future{fn: fn, res: res}: 188 case <-ctx.Done(): 189 return ctx.Err() 190 } 191 return <-res 192 } 193 194 // GoInFS starts fn in the container FS. It blocks until fn is started but does 195 // not wait until fn returns. An error is returned if ctx is canceled before fn 196 // has been started. 197 // 198 // The container filesystem is only visible to functions called in the same 199 // goroutine as fn. Goroutines started from fn will see the host's filesystem. 200 func (vw *containerFSView) GoInFS(ctx context.Context, fn func()) error { 201 select { 202 case vw.todo <- future{fn: func() error { fn(); return nil }}: 203 return nil 204 case <-ctx.Done(): 205 return ctx.Err() 206 } 207 } 208 209 // Close waits until any in-flight operations complete and frees all 210 // resources associated with vw. 211 func (vw *containerFSView) Close() error { 212 runtime.SetFinalizer(vw, nil) 213 close(vw.todo) 214 err := multierror.Append(nil, <-vw.done) 215 err = multierror.Append(err, vw.ctr.UnmountVolumes(context.TODO(), vw.d.LogVolumeEvent)) 216 err = multierror.Append(err, vw.d.Unmount(vw.ctr)) 217 return err.ErrorOrNil() 218 } 219 220 // Stat returns the metadata for path, relative to the current working directory 221 // of vw inside the container filesystem view. 222 func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.ContainerPathStat, error) { 223 var stat *types.ContainerPathStat 224 err := vw.RunInFS(ctx, func() error { 225 lstat, err := os.Lstat(path) 226 if err != nil { 227 return err 228 } 229 var target string 230 if lstat.Mode()&os.ModeSymlink != 0 { 231 // Fully evaluate symlinks along path to the ultimate 232 // target, or as much as possible with broken links. 233 target, err = symlink.FollowSymlinkInScope(path, "/") 234 if err != nil { 235 return err 236 } 237 } 238 stat = &types.ContainerPathStat{ 239 Name: filepath.Base(path), 240 Size: lstat.Size(), 241 Mode: lstat.Mode(), 242 Mtime: lstat.ModTime(), 243 LinkTarget: target, 244 } 245 return nil 246 }) 247 return stat, err 248 } 249 250 // makeMountRRO makes the mount recursively read-only. 251 func makeMountRRO(dest string) error { 252 attr := &unix.MountAttr{ 253 Attr_set: unix.MOUNT_ATTR_RDONLY, 254 } 255 var err error 256 for { 257 err = unix.MountSetattr(-1, dest, unix.AT_RECURSIVE, attr) 258 if !errors.Is(err, unix.EINTR) { 259 break 260 } 261 } 262 if err != nil { 263 err = fmt.Errorf("failed to apply MOUNT_ATTR_RDONLY with AT_RECURSIVE to %q: %w", dest, err) 264 } 265 return err 266 }