github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/mount.go (about) 1 // Copyright (c) 2017 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package virtcontainers 7 8 import ( 9 "context" 10 "errors" 11 "fmt" 12 "os" 13 "path/filepath" 14 "strings" 15 "syscall" 16 17 merr "github.com/hashicorp/go-multierror" 18 "github.com/kata-containers/runtime/virtcontainers/utils" 19 "github.com/sirupsen/logrus" 20 ) 21 22 // DefaultShmSize is the default shm size to be used in case host 23 // IPC is used. 24 const DefaultShmSize = 65536 * 1024 25 26 // Sadly golang/sys doesn't have UmountNoFollow although it's there since Linux 2.6.34 27 const UmountNoFollow = 0x8 28 29 var rootfsDir = "rootfs" 30 31 var systemMountPrefixes = []string{"/proc", "/sys"} 32 33 var propagationTypes = map[string]uintptr{ 34 "shared": syscall.MS_SHARED, 35 "private": syscall.MS_PRIVATE, 36 "slave": syscall.MS_SLAVE, 37 "ubind": syscall.MS_UNBINDABLE, 38 } 39 40 func isSystemMount(m string) bool { 41 for _, p := range systemMountPrefixes { 42 if m == p || strings.HasPrefix(m, p+"/") { 43 return true 44 } 45 } 46 47 return false 48 } 49 50 func isHostDevice(m string) bool { 51 if m == "/dev" { 52 return true 53 } 54 55 if strings.HasPrefix(m, "/dev/") { 56 // Check if regular file 57 s, err := os.Stat(m) 58 59 // This should not happen. In case file does not exist let the 60 // error be handled by the agent, simply return false here. 61 if err != nil { 62 return false 63 } 64 65 if s.Mode().IsRegular() { 66 return false 67 } 68 69 // This is not a regular file in /dev. It is either a 70 // device file, directory or any other special file which is 71 // specific to the host system. 72 return true 73 } 74 75 return false 76 } 77 78 func major(dev uint64) int { 79 return int((dev >> 8) & 0xfff) 80 } 81 82 func minor(dev uint64) int { 83 return int((dev & 0xff) | ((dev >> 12) & 0xfff00)) 84 } 85 86 type device struct { 87 major int 88 minor int 89 mountPoint string 90 } 91 92 var errMountPointNotFound = errors.New("Mount point not found") 93 94 // getDeviceForPath gets the underlying device containing the file specified by path. 95 // The device type constitutes the major-minor number of the device and the dest mountPoint for the device 96 // 97 // eg. if /dev/sda1 is mounted on /a/b/c, a call to getDeviceForPath("/a/b/c/file") would return 98 // 99 // device { 100 // major : major(/dev/sda1) 101 // minor : minor(/dev/sda1) 102 // mountPoint: /a/b/c 103 // } 104 // 105 // if the path is a device path file such as /dev/sda1, it would return 106 // 107 // device { 108 // major : major(/dev/sda1) 109 // minor : minor(/dev/sda1) 110 // mountPoint: 111 112 func getDeviceForPath(path string) (device, error) { 113 var devMajor int 114 var devMinor int 115 116 if path == "" { 117 return device{}, fmt.Errorf("Path cannot be empty") 118 } 119 120 stat := syscall.Stat_t{} 121 err := syscall.Stat(path, &stat) 122 if err != nil { 123 return device{}, err 124 } 125 126 if isHostDevice(path) { 127 // stat.Rdev describes the device that this file (inode) represents. 128 devMajor = major(stat.Rdev) 129 devMinor = minor(stat.Rdev) 130 131 return device{ 132 major: devMajor, 133 minor: devMinor, 134 mountPoint: "", 135 }, nil 136 } 137 // stat.Dev points to the underlying device containing the file 138 devMajor = major(stat.Dev) 139 devMinor = minor(stat.Dev) 140 141 path, err = filepath.Abs(path) 142 if err != nil { 143 return device{}, err 144 } 145 146 mountPoint := path 147 148 if path == "/" { 149 return device{ 150 major: devMajor, 151 minor: devMinor, 152 mountPoint: mountPoint, 153 }, nil 154 } 155 156 // We get the mount point by recursively peforming stat on the path 157 // The point where the device changes indicates the mountpoint 158 for { 159 if mountPoint == "/" { 160 return device{}, errMountPointNotFound 161 } 162 163 parentStat := syscall.Stat_t{} 164 parentDir := filepath.Dir(path) 165 166 err := syscall.Lstat(parentDir, &parentStat) 167 if err != nil { 168 return device{}, err 169 } 170 171 if parentStat.Dev != stat.Dev { 172 break 173 } 174 175 mountPoint = parentDir 176 stat = parentStat 177 path = parentDir 178 } 179 180 dev := device{ 181 major: devMajor, 182 minor: devMinor, 183 mountPoint: mountPoint, 184 } 185 186 return dev, nil 187 } 188 189 var blockFormatTemplate = "/sys/dev/block/%d:%d/dm" 190 191 var checkStorageDriver = isDeviceMapper 192 193 // isDeviceMapper checks if the device with the major and minor numbers is a devicemapper block device 194 func isDeviceMapper(major, minor int) (bool, error) { 195 196 //Check if /sys/dev/block/${major}-${minor}/dm exists 197 sysPath := fmt.Sprintf(blockFormatTemplate, major, minor) 198 199 _, err := os.Stat(sysPath) 200 if err == nil { 201 return true, nil 202 } else if os.IsNotExist(err) { 203 return false, nil 204 } 205 206 return false, err 207 } 208 209 const mountPerm = os.FileMode(0755) 210 211 // bindMount bind mounts a source in to a destination. This will 212 // do some bookkeeping: 213 // * evaluate all symlinks 214 // * ensure the source exists 215 // * recursively create the destination 216 // pgtypes stands for propagation types, which are shared, private, slave, and ubind. 217 func bindMount(ctx context.Context, source, destination string, readonly bool, pgtypes string) error { 218 span, _ := trace(ctx, "bindMount") 219 defer span.Finish() 220 221 if source == "" { 222 return fmt.Errorf("source must be specified") 223 } 224 if destination == "" { 225 return fmt.Errorf("destination must be specified") 226 } 227 228 absSource, err := filepath.EvalSymlinks(source) 229 if err != nil { 230 return fmt.Errorf("Could not resolve symlink for source %v", source) 231 } 232 233 if err := ensureDestinationExists(absSource, destination); err != nil { 234 return fmt.Errorf("Could not create destination mount point %v: %v", destination, err) 235 } 236 237 if err := syscall.Mount(absSource, destination, "bind", syscall.MS_BIND, ""); err != nil { 238 return fmt.Errorf("Could not bind mount %v to %v: %v", absSource, destination, err) 239 } 240 241 if pgtype, exist := propagationTypes[pgtypes]; exist { 242 if err := syscall.Mount("none", destination, "", pgtype, ""); err != nil { 243 return fmt.Errorf("Could not make mount point %v %s: %v", destination, pgtypes, err) 244 } 245 } else { 246 return fmt.Errorf("Wrong propagation type %s", pgtypes) 247 } 248 249 // For readonly bind mounts, we need to remount with the readonly flag. 250 // This is needed as only very recent versions of libmount/util-linux support "bind,ro" 251 if readonly { 252 return syscall.Mount(absSource, destination, "bind", uintptr(syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY), "") 253 } 254 255 return nil 256 } 257 258 // An existing mount may be remounted by specifying `MS_REMOUNT` in 259 // mountflags. 260 // This allows you to change the mountflags of an existing mount. 261 // The mountflags should match the values used in the original mount() call, 262 // except for those parameters that you are trying to change. 263 func remount(ctx context.Context, mountflags uintptr, src string) error { 264 absSrc, err := filepath.EvalSymlinks(src) 265 if err != nil { 266 return fmt.Errorf("Could not resolve symlink for %s", src) 267 } 268 269 if err := syscall.Mount(absSrc, absSrc, "", syscall.MS_REMOUNT|mountflags, ""); err != nil { 270 return fmt.Errorf("remount %s failed: %v", absSrc, err) 271 } 272 273 return nil 274 } 275 276 // remount a mount point as readonly 277 func remountRo(ctx context.Context, src string) error { 278 return remount(ctx, syscall.MS_BIND|syscall.MS_RDONLY, src) 279 } 280 281 // bindMountContainerRootfs bind mounts a container rootfs into a 9pfs shared 282 // directory between the guest and the host. 283 func bindMountContainerRootfs(ctx context.Context, shareDir, cid, cRootFs string, readonly bool) error { 284 span, _ := trace(ctx, "bindMountContainerRootfs") 285 defer span.Finish() 286 287 rootfsDest := filepath.Join(shareDir, cid, rootfsDir) 288 289 return bindMount(ctx, cRootFs, rootfsDest, readonly, "private") 290 } 291 292 // Mount describes a container mount. 293 type Mount struct { 294 Source string 295 Destination string 296 297 // Type specifies the type of filesystem to mount. 298 Type string 299 300 // Options list all the mount options of the filesystem. 301 Options []string 302 303 // HostPath used to store host side bind mount path 304 HostPath string 305 306 // ReadOnly specifies if the mount should be read only or not 307 ReadOnly bool 308 309 // BlockDeviceID represents block device that is attached to the 310 // VM in case this mount is a block device file or a directory 311 // backed by a block device. 312 BlockDeviceID string 313 } 314 315 func isSymlink(path string) bool { 316 stat, err := os.Stat(path) 317 if err != nil { 318 return false 319 } 320 return stat.Mode()&os.ModeSymlink != 0 321 } 322 323 func bindUnmountContainerRootfs(ctx context.Context, sharedDir string, con *Container) error { 324 span, _ := trace(ctx, "bindUnmountContainerRootfs") 325 defer span.Finish() 326 327 if con.state.Fstype != "" && con.state.BlockDeviceID != "" { 328 return nil 329 } 330 331 rootfsDest := filepath.Join(sharedDir, con.id, rootfsDir) 332 if isSymlink(filepath.Join(sharedDir, con.id)) || isSymlink(rootfsDest) { 333 logrus.Warnf("container dir %s is a symlink, malicious guest?", con.id) 334 return nil 335 } 336 337 err := syscall.Unmount(rootfsDest, syscall.MNT_DETACH|UmountNoFollow) 338 if err == syscall.ENOENT { 339 logrus.Warnf("%s: %s", err, rootfsDest) 340 return nil 341 } 342 if err := syscall.Rmdir(rootfsDest); err != nil { 343 logrus.WithError(err).WithField("rootfs-dir", rootfsDest).Warn("Could not remove container rootfs dir") 344 } 345 346 return err 347 } 348 349 func bindUnmountAllRootfs(ctx context.Context, sharedDir string, sandbox *Sandbox) error { 350 span, _ := trace(ctx, "bindUnmountAllRootfs") 351 defer span.Finish() 352 353 var errors *merr.Error 354 for _, c := range sandbox.containers { 355 if isSymlink(filepath.Join(sharedDir, c.id)) { 356 logrus.Warnf("container dir %s is a symlink, malicious guest?", c.id) 357 continue 358 } 359 c.unmountHostMounts() 360 if c.state.Fstype == "" { 361 // even if error found, don't break out of loop until all mounts attempted 362 // to be unmounted, and collect all errors 363 errors = merr.Append(errors, bindUnmountContainerRootfs(c.ctx, sharedDir, c)) 364 } 365 } 366 return errors.ErrorOrNil() 367 } 368 369 const ( 370 dockerVolumePrefix = "/var/lib/docker/volumes" 371 dockerVolumeSuffix = "_data" 372 ) 373 374 // IsDockerVolume returns true if the given source path is 375 // a docker volume. 376 // This uses a very specific path that is used by docker. 377 func IsDockerVolume(path string) bool { 378 if strings.HasPrefix(path, dockerVolumePrefix) && filepath.Base(path) == dockerVolumeSuffix { 379 return true 380 } 381 return false 382 } 383 384 const ( 385 // K8sEmptyDir is the k8s specific path for `empty-dir` volumes 386 K8sEmptyDir = "kubernetes.io~empty-dir" 387 ) 388 389 // IsEphemeralStorage returns true if the given path 390 // to the storage belongs to kubernetes ephemeral storage 391 // 392 // This method depends on a specific path used by k8s 393 // to detect if it's of type ephemeral. As of now, 394 // this is a very k8s specific solution that works 395 // but in future there should be a better way for this 396 // method to determine if the path is for ephemeral 397 // volume type 398 func IsEphemeralStorage(path string) bool { 399 if !isEmptyDir(path) { 400 return false 401 } 402 403 if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType == "tmpfs" { 404 return true 405 } 406 407 return false 408 } 409 410 // Isk8sHostEmptyDir returns true if the given path 411 // to the storage belongs to kubernetes empty-dir of medium "default" 412 // i.e volumes that are directories on the host. 413 func Isk8sHostEmptyDir(path string) bool { 414 if !isEmptyDir(path) { 415 return false 416 } 417 418 if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType != "tmpfs" { 419 return true 420 } 421 return false 422 } 423 424 func isEmptyDir(path string) bool { 425 splitSourceSlice := strings.Split(path, "/") 426 if len(splitSourceSlice) > 1 { 427 storageType := splitSourceSlice[len(splitSourceSlice)-2] 428 if storageType == K8sEmptyDir { 429 return true 430 } 431 } 432 return false 433 }