github.com/containerd/containerd@v22.0.0-20200918172823-438c87b8e050+incompatible/mount/mount_linux.go (about) 1 /* 2 Copyright The containerd Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package mount 18 19 import ( 20 "fmt" 21 "os" 22 "os/exec" 23 "path" 24 "strings" 25 "time" 26 27 "github.com/containerd/containerd/sys" 28 "github.com/pkg/errors" 29 "golang.org/x/sys/unix" 30 ) 31 32 var ( 33 pagesize = 4096 34 allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"} 35 ) 36 37 func init() { 38 pagesize = os.Getpagesize() 39 } 40 41 // Mount to the provided target path. 42 // 43 // If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3" 44 // helper binary is called. 45 func (m *Mount) Mount(target string) error { 46 for _, helperBinary := range allowedHelperBinaries { 47 // helperBinary = "mount.fuse", typePrefix = "fuse." 48 typePrefix := strings.TrimPrefix(helperBinary, "mount.") + "." 49 if strings.HasPrefix(m.Type, typePrefix) { 50 return m.mountWithHelper(helperBinary, typePrefix, target) 51 } 52 } 53 var ( 54 chdir string 55 options = m.Options 56 ) 57 58 // avoid hitting one page limit of mount argument buffer 59 // 60 // NOTE: 512 is a buffer during pagesize check. 61 if m.Type == "overlay" && optionsSize(options) >= pagesize-512 { 62 chdir, options = compactLowerdirOption(options) 63 } 64 65 flags, data := parseMountOptions(options) 66 if len(data) > pagesize { 67 return errors.Errorf("mount options is too long") 68 } 69 70 // propagation types. 71 const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE 72 73 // Ensure propagation type change flags aren't included in other calls. 74 oflags := flags &^ ptypes 75 76 // In the case of remounting with changed data (data != ""), need to call mount (moby/moby#34077). 77 if flags&unix.MS_REMOUNT == 0 || data != "" { 78 // Initial call applying all non-propagation flags for mount 79 // or remount with changed data 80 if err := mountAt(chdir, m.Source, target, m.Type, uintptr(oflags), data); err != nil { 81 return err 82 } 83 } 84 85 if flags&ptypes != 0 { 86 // Change the propagation type. 87 const pflags = ptypes | unix.MS_REC | unix.MS_SILENT 88 if err := unix.Mount("", target, "", uintptr(flags&pflags), ""); err != nil { 89 return err 90 } 91 } 92 93 const broflags = unix.MS_BIND | unix.MS_RDONLY 94 if oflags&broflags == broflags { 95 // Remount the bind to apply read only. 96 return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "") 97 } 98 return nil 99 } 100 101 // Unmount the provided mount path with the flags 102 func Unmount(target string, flags int) error { 103 if err := unmount(target, flags); err != nil && err != unix.EINVAL { 104 return err 105 } 106 return nil 107 } 108 109 func isFUSE(dir string) (bool, error) { 110 // fuseSuperMagic is defined in statfs(2) 111 const fuseSuperMagic = 0x65735546 112 var st unix.Statfs_t 113 if err := unix.Statfs(dir, &st); err != nil { 114 return false, err 115 } 116 return st.Type == fuseSuperMagic, nil 117 } 118 119 func unmount(target string, flags int) error { 120 // For FUSE mounts, attempting to execute fusermount helper binary is preferred 121 // https://github.com/containerd/containerd/pull/3765#discussion_r342083514 122 if ok, err := isFUSE(target); err == nil && ok { 123 for _, helperBinary := range []string{"fusermount3", "fusermount"} { 124 cmd := exec.Command(helperBinary, "-u", target) 125 if err := cmd.Run(); err == nil { 126 return nil 127 } 128 // ignore error and try unix.Unmount 129 } 130 } 131 for i := 0; i < 50; i++ { 132 if err := unix.Unmount(target, flags); err != nil { 133 switch err { 134 case unix.EBUSY: 135 time.Sleep(50 * time.Millisecond) 136 continue 137 default: 138 return err 139 } 140 } 141 return nil 142 } 143 return errors.Wrapf(unix.EBUSY, "failed to unmount target %s", target) 144 } 145 146 // UnmountAll repeatedly unmounts the given mount point until there 147 // are no mounts remaining (EINVAL is returned by mount), which is 148 // useful for undoing a stack of mounts on the same mount point. 149 // UnmountAll all is noop when the first argument is an empty string. 150 // This is done when the containerd client did not specify any rootfs 151 // mounts (e.g. because the rootfs is managed outside containerd) 152 // UnmountAll is noop when the mount path does not exist. 153 func UnmountAll(mount string, flags int) error { 154 if mount == "" { 155 return nil 156 } 157 if _, err := os.Stat(mount); os.IsNotExist(err) { 158 return nil 159 } 160 161 for { 162 if err := unmount(mount, flags); err != nil { 163 // EINVAL is returned if the target is not a 164 // mount point, indicating that we are 165 // done. It can also indicate a few other 166 // things (such as invalid flags) which we 167 // unfortunately end up squelching here too. 168 if err == unix.EINVAL { 169 return nil 170 } 171 return err 172 } 173 } 174 } 175 176 // parseMountOptions takes fstab style mount options and parses them for 177 // use with a standard mount() syscall 178 func parseMountOptions(options []string) (int, string) { 179 var ( 180 flag int 181 data []string 182 ) 183 flags := map[string]struct { 184 clear bool 185 flag int 186 }{ 187 "async": {true, unix.MS_SYNCHRONOUS}, 188 "atime": {true, unix.MS_NOATIME}, 189 "bind": {false, unix.MS_BIND}, 190 "defaults": {false, 0}, 191 "dev": {true, unix.MS_NODEV}, 192 "diratime": {true, unix.MS_NODIRATIME}, 193 "dirsync": {false, unix.MS_DIRSYNC}, 194 "exec": {true, unix.MS_NOEXEC}, 195 "mand": {false, unix.MS_MANDLOCK}, 196 "noatime": {false, unix.MS_NOATIME}, 197 "nodev": {false, unix.MS_NODEV}, 198 "nodiratime": {false, unix.MS_NODIRATIME}, 199 "noexec": {false, unix.MS_NOEXEC}, 200 "nomand": {true, unix.MS_MANDLOCK}, 201 "norelatime": {true, unix.MS_RELATIME}, 202 "nostrictatime": {true, unix.MS_STRICTATIME}, 203 "nosuid": {false, unix.MS_NOSUID}, 204 "rbind": {false, unix.MS_BIND | unix.MS_REC}, 205 "relatime": {false, unix.MS_RELATIME}, 206 "remount": {false, unix.MS_REMOUNT}, 207 "ro": {false, unix.MS_RDONLY}, 208 "rw": {true, unix.MS_RDONLY}, 209 "strictatime": {false, unix.MS_STRICTATIME}, 210 "suid": {true, unix.MS_NOSUID}, 211 "sync": {false, unix.MS_SYNCHRONOUS}, 212 } 213 for _, o := range options { 214 // If the option does not exist in the flags table or the flag 215 // is not supported on the platform, 216 // then it is a data value for a specific fs type 217 if f, exists := flags[o]; exists && f.flag != 0 { 218 if f.clear { 219 flag &^= f.flag 220 } else { 221 flag |= f.flag 222 } 223 } else { 224 data = append(data, o) 225 } 226 } 227 return flag, strings.Join(data, ",") 228 } 229 230 // compactLowerdirOption updates overlay lowdir option and returns the common 231 // dir among all the lowdirs. 232 func compactLowerdirOption(opts []string) (string, []string) { 233 idx, dirs := findOverlayLowerdirs(opts) 234 if idx == -1 || len(dirs) == 1 { 235 // no need to compact if there is only one lowerdir 236 return "", opts 237 } 238 239 // find out common dir 240 commondir := longestCommonPrefix(dirs) 241 if commondir == "" { 242 return "", opts 243 } 244 245 // NOTE: the snapshot id is based on digits. 246 // in order to avoid to get snapshots/x, should be back to parent dir. 247 // however, there is assumption that the common dir is ${root}/io.containerd.v1.overlayfs/snapshots. 248 commondir = path.Dir(commondir) 249 if commondir == "/" { 250 return "", opts 251 } 252 commondir = commondir + "/" 253 254 newdirs := make([]string, 0, len(dirs)) 255 for _, dir := range dirs { 256 newdirs = append(newdirs, dir[len(commondir):]) 257 } 258 259 newopts := copyOptions(opts) 260 newopts = append(newopts[:idx], newopts[idx+1:]...) 261 newopts = append(newopts, fmt.Sprintf("lowerdir=%s", strings.Join(newdirs, ":"))) 262 return commondir, newopts 263 } 264 265 // findOverlayLowerdirs returns the index of lowerdir in mount's options and 266 // all the lowerdir target. 267 func findOverlayLowerdirs(opts []string) (int, []string) { 268 var ( 269 idx = -1 270 prefix = "lowerdir=" 271 ) 272 273 for i, opt := range opts { 274 if strings.HasPrefix(opt, prefix) { 275 idx = i 276 break 277 } 278 } 279 280 if idx == -1 { 281 return -1, nil 282 } 283 return idx, strings.Split(opts[idx][len(prefix):], ":") 284 } 285 286 // longestCommonPrefix finds the longest common prefix in the string slice. 287 func longestCommonPrefix(strs []string) string { 288 if len(strs) == 0 { 289 return "" 290 } else if len(strs) == 1 { 291 return strs[0] 292 } 293 294 // find out the min/max value by alphabetical order 295 min, max := strs[0], strs[0] 296 for _, str := range strs[1:] { 297 if min > str { 298 min = str 299 } 300 if max < str { 301 max = str 302 } 303 } 304 305 // find out the common part between min and max 306 for i := 0; i < len(min) && i < len(max); i++ { 307 if min[i] != max[i] { 308 return min[:i] 309 } 310 } 311 return min 312 } 313 314 // copyOptions copies the options. 315 func copyOptions(opts []string) []string { 316 if len(opts) == 0 { 317 return nil 318 } 319 320 acopy := make([]string, len(opts)) 321 copy(acopy, opts) 322 return acopy 323 } 324 325 // optionsSize returns the byte size of options of mount. 326 func optionsSize(opts []string) int { 327 size := 0 328 for _, opt := range opts { 329 size += len(opt) 330 } 331 return size 332 } 333 334 func mountAt(chdir string, source, target, fstype string, flags uintptr, data string) error { 335 if chdir == "" { 336 return unix.Mount(source, target, fstype, flags, data) 337 } 338 339 f, err := os.Open(chdir) 340 if err != nil { 341 return errors.Wrap(err, "failed to mountat") 342 } 343 defer f.Close() 344 345 fs, err := f.Stat() 346 if err != nil { 347 return errors.Wrap(err, "failed to mountat") 348 } 349 350 if !fs.IsDir() { 351 return errors.Wrap(errors.Errorf("%s is not dir", chdir), "failed to mountat") 352 } 353 return errors.Wrap(sys.FMountat(f.Fd(), source, target, fstype, flags, data), "failed to mountat") 354 } 355 356 func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error { 357 // helperBinary: "mount.fuse3" 358 // target: "/foo/merged" 359 // m.Type: "fuse3.fuse-overlayfs" 360 // command: "mount.fuse3 overlay /foo/merged -o lowerdir=/foo/lower2:/foo/lower1,upperdir=/foo/upper,workdir=/foo/work -t fuse-overlayfs" 361 args := []string{m.Source, target} 362 for _, o := range m.Options { 363 args = append(args, "-o", o) 364 } 365 args = append(args, "-t", strings.TrimPrefix(m.Type, typePrefix)) 366 367 infoBeforeMount, err := Lookup(target) 368 if err != nil { 369 return err 370 } 371 372 // cmd.CombinedOutput() may intermittently return ECHILD because of our signal handling in shim. 373 // See #4387 and wait(2). 374 const retriesOnECHILD = 10 375 for i := 0; i < retriesOnECHILD; i++ { 376 cmd := exec.Command(helperBinary, args...) 377 out, err := cmd.CombinedOutput() 378 if err == nil { 379 return nil 380 } 381 if !errors.Is(err, unix.ECHILD) { 382 return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out)) 383 } 384 // We got ECHILD, we are not sure whether the mount was successful. 385 // If the mount ID has changed, we are sure we got some new mount, but still not sure it is fully completed. 386 // So we attempt to unmount the new mount before retrying. 387 infoAfterMount, err := Lookup(target) 388 if err != nil { 389 return err 390 } 391 if infoAfterMount.ID != infoBeforeMount.ID { 392 _ = unmount(target, 0) 393 } 394 } 395 return errors.Errorf("mount helper [%s %v] failed with ECHILD (retired %d times)", helperBinary, args, retriesOnECHILD) 396 }