github.com/akerouanton/docker@v1.11.0-rc3/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "bufio" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 "path" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "syscall" 20 "time" 21 22 "github.com/Sirupsen/logrus" 23 24 "github.com/docker/docker/daemon/graphdriver" 25 "github.com/docker/docker/pkg/devicemapper" 26 "github.com/docker/docker/pkg/idtools" 27 "github.com/docker/docker/pkg/loopback" 28 "github.com/docker/docker/pkg/mount" 29 "github.com/docker/docker/pkg/parsers" 30 "github.com/docker/go-units" 31 32 "github.com/opencontainers/runc/libcontainer/label" 33 ) 34 35 var ( 36 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 37 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 38 defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024 39 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 40 defaultUdevSyncOverride = false 41 maxDeviceID = 0xffffff // 24 bit, pool limit 42 deviceIDMapSz = (maxDeviceID + 1) / 8 43 // We retry device removal so many a times that even error messages 44 // will fill up console during normal operation. So only log Fatal 45 // messages by default. 46 logLevel = devicemapper.LogLevelFatal 47 driverDeferredRemovalSupport = false 48 enableDeferredRemoval = false 49 enableDeferredDeletion = false 50 userBaseSize = false 51 defaultMinFreeSpacePercent uint32 = 10 52 ) 53 54 const deviceSetMetaFile string = "deviceset-metadata" 55 const transactionMetaFile string = "transaction-metadata" 56 57 type transaction struct { 58 OpenTransactionID uint64 `json:"open_transaction_id"` 59 DeviceIDHash string `json:"device_hash"` 60 DeviceID int `json:"device_id"` 61 } 62 63 type devInfo struct { 64 Hash string `json:"-"` 65 DeviceID int `json:"device_id"` 66 Size uint64 `json:"size"` 67 TransactionID uint64 `json:"transaction_id"` 68 Initialized bool `json:"initialized"` 69 Deleted bool `json:"deleted"` 70 devices *DeviceSet 71 72 // The global DeviceSet lock guarantees that we serialize all 73 // the calls to libdevmapper (which is not threadsafe), but we 74 // sometimes release that lock while sleeping. In that case 75 // this per-device lock is still held, protecting against 76 // other accesses to the device that we're doing the wait on. 77 // 78 // WARNING: In order to avoid AB-BA deadlocks when releasing 79 // the global lock while holding the per-device locks all 80 // device locks must be acquired *before* the device lock, and 81 // multiple device locks should be acquired parent before child. 82 lock sync.Mutex 83 } 84 85 type metaData struct { 86 Devices map[string]*devInfo `json:"Devices"` 87 } 88 89 // DeviceSet holds information about list of devices 90 type DeviceSet struct { 91 metaData `json:"-"` 92 sync.Mutex `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper 93 root string 94 devicePrefix string 95 TransactionID uint64 `json:"-"` 96 NextDeviceID int `json:"next_device_id"` 97 deviceIDMap []byte 98 99 // Options 100 dataLoopbackSize int64 101 metaDataLoopbackSize int64 102 baseFsSize uint64 103 filesystem string 104 mountOptions string 105 mkfsArgs []string 106 dataDevice string // block or loop dev 107 dataLoopFile string // loopback file, if used 108 metadataDevice string // block or loop dev 109 metadataLoopFile string // loopback file, if used 110 doBlkDiscard bool 111 thinpBlockSize uint32 112 thinPoolDevice string 113 transaction `json:"-"` 114 overrideUdevSyncCheck bool 115 deferredRemove bool // use deferred removal 116 deferredDelete bool // use deferred deletion 117 BaseDeviceUUID string // save UUID of base device 118 BaseDeviceFilesystem string // save filesystem of base device 119 nrDeletedDevices uint // number of deleted devices 120 deletionWorkerTicker *time.Ticker 121 uidMaps []idtools.IDMap 122 gidMaps []idtools.IDMap 123 minFreeSpacePercent uint32 //min free space percentage in thinpool 124 } 125 126 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 127 type DiskUsage struct { 128 // Used bytes on the disk. 129 Used uint64 130 // Total bytes on the disk. 131 Total uint64 132 // Available bytes on the disk. 133 Available uint64 134 } 135 136 // Status returns the information about the device. 137 type Status struct { 138 // PoolName is the name of the data pool. 139 PoolName string 140 // DataFile is the actual block device for data. 141 DataFile string 142 // DataLoopback loopback file, if used. 143 DataLoopback string 144 // MetadataFile is the actual block device for metadata. 145 MetadataFile string 146 // MetadataLoopback is the loopback file, if used. 147 MetadataLoopback string 148 // Data is the disk used for data. 149 Data DiskUsage 150 // Metadata is the disk used for meta data. 151 Metadata DiskUsage 152 // BaseDeviceSize is base size of container and image 153 BaseDeviceSize uint64 154 // BaseDeviceFS is backing filesystem. 155 BaseDeviceFS string 156 // SectorSize size of the vector. 157 SectorSize uint64 158 // UdevSyncSupported is true if sync is supported. 159 UdevSyncSupported bool 160 // DeferredRemoveEnabled is true then the device is not unmounted. 161 DeferredRemoveEnabled bool 162 // True if deferred deletion is enabled. This is different from 163 // deferred removal. "removal" means that device mapper device is 164 // deactivated. Thin device is still in thin pool and can be activated 165 // again. But "deletion" means that thin device will be deleted from 166 // thin pool and it can't be activated again. 167 DeferredDeleteEnabled bool 168 DeferredDeletedDeviceCount uint 169 } 170 171 // Structure used to export image/container metadata in docker inspect. 172 type deviceMetadata struct { 173 deviceID int 174 deviceSize uint64 // size in bytes 175 deviceName string // Device name as used during activation 176 } 177 178 // DevStatus returns information about device mounted containing its id, size and sector information. 179 type DevStatus struct { 180 // DeviceID is the id of the device. 181 DeviceID int 182 // Size is the size of the filesystem. 183 Size uint64 184 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 185 TransactionID uint64 186 // SizeInSectors indicates the size of the sectors allocated. 187 SizeInSectors uint64 188 // MappedSectors indicates number of mapped sectors. 189 MappedSectors uint64 190 // HighestMappedSector is the pointer to the highest mapped sector. 191 HighestMappedSector uint64 192 } 193 194 func getDevName(name string) string { 195 return "/dev/mapper/" + name 196 } 197 198 func (info *devInfo) Name() string { 199 hash := info.Hash 200 if hash == "" { 201 hash = "base" 202 } 203 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 204 } 205 206 func (info *devInfo) DevName() string { 207 return getDevName(info.Name()) 208 } 209 210 func (devices *DeviceSet) loopbackDir() string { 211 return path.Join(devices.root, "devicemapper") 212 } 213 214 func (devices *DeviceSet) metadataDir() string { 215 return path.Join(devices.root, "metadata") 216 } 217 218 func (devices *DeviceSet) metadataFile(info *devInfo) string { 219 file := info.Hash 220 if file == "" { 221 file = "base" 222 } 223 return path.Join(devices.metadataDir(), file) 224 } 225 226 func (devices *DeviceSet) transactionMetaFile() string { 227 return path.Join(devices.metadataDir(), transactionMetaFile) 228 } 229 230 func (devices *DeviceSet) deviceSetMetaFile() string { 231 return path.Join(devices.metadataDir(), deviceSetMetaFile) 232 } 233 234 func (devices *DeviceSet) oldMetadataFile() string { 235 return path.Join(devices.loopbackDir(), "json") 236 } 237 238 func (devices *DeviceSet) getPoolName() string { 239 if devices.thinPoolDevice == "" { 240 return devices.devicePrefix + "-pool" 241 } 242 return devices.thinPoolDevice 243 } 244 245 func (devices *DeviceSet) getPoolDevName() string { 246 return getDevName(devices.getPoolName()) 247 } 248 249 func (devices *DeviceSet) hasImage(name string) bool { 250 dirname := devices.loopbackDir() 251 filename := path.Join(dirname, name) 252 253 _, err := os.Stat(filename) 254 return err == nil 255 } 256 257 // ensureImage creates a sparse file of <size> bytes at the path 258 // <root>/devicemapper/<name>. 259 // If the file already exists and new size is larger than its current size, it grows to the new size. 260 // Either way it returns the full path. 261 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 262 dirname := devices.loopbackDir() 263 filename := path.Join(dirname, name) 264 265 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 266 if err != nil { 267 return "", err 268 } 269 if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) { 270 return "", err 271 } 272 273 if fi, err := os.Stat(filename); err != nil { 274 if !os.IsNotExist(err) { 275 return "", err 276 } 277 logrus.Debugf("devmapper: Creating loopback file %s for device-manage use", filename) 278 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 279 if err != nil { 280 return "", err 281 } 282 defer file.Close() 283 284 if err := file.Truncate(size); err != nil { 285 return "", err 286 } 287 } else { 288 if fi.Size() < size { 289 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 290 if err != nil { 291 return "", err 292 } 293 defer file.Close() 294 if err := file.Truncate(size); err != nil { 295 return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err) 296 } 297 } else if fi.Size() > size { 298 logrus.Warnf("devmapper: Can't shrink loopback file %s", filename) 299 } 300 } 301 return filename, nil 302 } 303 304 func (devices *DeviceSet) allocateTransactionID() uint64 { 305 devices.OpenTransactionID = devices.TransactionID + 1 306 return devices.OpenTransactionID 307 } 308 309 func (devices *DeviceSet) updatePoolTransactionID() error { 310 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 311 return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err) 312 } 313 devices.TransactionID = devices.OpenTransactionID 314 return nil 315 } 316 317 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 318 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 319 return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err) 320 } 321 return nil 322 } 323 324 // Given json data and file path, write it to disk 325 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 326 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 327 if err != nil { 328 return fmt.Errorf("devmapper: Error creating metadata file: %s", err) 329 } 330 331 n, err := tmpFile.Write(jsonData) 332 if err != nil { 333 return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err) 334 } 335 if n < len(jsonData) { 336 return io.ErrShortWrite 337 } 338 if err := tmpFile.Sync(); err != nil { 339 return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err) 340 } 341 if err := tmpFile.Close(); err != nil { 342 return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err) 343 } 344 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 345 return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err) 346 } 347 348 return nil 349 } 350 351 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 352 jsonData, err := json.Marshal(info) 353 if err != nil { 354 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 355 } 356 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 357 return err 358 } 359 return nil 360 } 361 362 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 363 var mask byte 364 i := deviceID % 8 365 mask = 1 << uint(i) 366 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 367 } 368 369 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 370 var mask byte 371 i := deviceID % 8 372 mask = ^(1 << uint(i)) 373 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 374 } 375 376 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 377 var mask byte 378 i := deviceID % 8 379 mask = (1 << uint(i)) 380 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 381 return false 382 } 383 return true 384 } 385 386 // Should be called with devices.Lock() held. 387 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 388 info := devices.Devices[hash] 389 if info == nil { 390 info = devices.loadMetadata(hash) 391 if info == nil { 392 return nil, fmt.Errorf("devmapper: Unknown device %s", hash) 393 } 394 395 devices.Devices[hash] = info 396 } 397 return info, nil 398 } 399 400 func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) { 401 devices.Lock() 402 defer devices.Unlock() 403 info, err := devices.lookupDevice(hash) 404 return info, err 405 } 406 407 // This function relies on that device hash map has been loaded in advance. 408 // Should be called with devices.Lock() held. 409 func (devices *DeviceSet) constructDeviceIDMap() { 410 logrus.Debugf("devmapper: constructDeviceIDMap()") 411 defer logrus.Debugf("devmapper: constructDeviceIDMap() END") 412 413 for _, info := range devices.Devices { 414 devices.markDeviceIDUsed(info.DeviceID) 415 logrus.Debugf("devmapper: Added deviceId=%d to DeviceIdMap", info.DeviceID) 416 } 417 } 418 419 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 420 421 // Skip some of the meta files which are not device files. 422 if strings.HasSuffix(finfo.Name(), ".migrated") { 423 logrus.Debugf("devmapper: Skipping file %s", path) 424 return nil 425 } 426 427 if strings.HasPrefix(finfo.Name(), ".") { 428 logrus.Debugf("devmapper: Skipping file %s", path) 429 return nil 430 } 431 432 if finfo.Name() == deviceSetMetaFile { 433 logrus.Debugf("devmapper: Skipping file %s", path) 434 return nil 435 } 436 437 if finfo.Name() == transactionMetaFile { 438 logrus.Debugf("devmapper: Skipping file %s", path) 439 return nil 440 } 441 442 logrus.Debugf("devmapper: Loading data for file %s", path) 443 444 hash := finfo.Name() 445 if hash == "base" { 446 hash = "" 447 } 448 449 // Include deleted devices also as cleanup delete device logic 450 // will go through it and see if there are any deleted devices. 451 if _, err := devices.lookupDevice(hash); err != nil { 452 return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err) 453 } 454 455 return nil 456 } 457 458 func (devices *DeviceSet) loadDeviceFilesOnStart() error { 459 logrus.Debugf("devmapper: loadDeviceFilesOnStart()") 460 defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END") 461 462 var scan = func(path string, info os.FileInfo, err error) error { 463 if err != nil { 464 logrus.Debugf("devmapper: Can't walk the file %s", path) 465 return nil 466 } 467 468 // Skip any directories 469 if info.IsDir() { 470 return nil 471 } 472 473 return devices.deviceFileWalkFunction(path, info) 474 } 475 476 return filepath.Walk(devices.metadataDir(), scan) 477 } 478 479 // Should be called with devices.Lock() held. 480 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 481 logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash) 482 info := &devInfo{ 483 Hash: hash, 484 DeviceID: id, 485 } 486 487 delete(devices.Devices, hash) 488 489 if err := devices.removeMetadata(info); err != nil { 490 logrus.Debugf("devmapper: Error removing metadata: %s", err) 491 return err 492 } 493 494 return nil 495 } 496 497 // Should be called with devices.Lock() held. 498 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 499 logrus.Debugf("devmapper: registerDevice(%v, %v)", id, hash) 500 info := &devInfo{ 501 Hash: hash, 502 DeviceID: id, 503 Size: size, 504 TransactionID: transactionID, 505 Initialized: false, 506 devices: devices, 507 } 508 509 devices.Devices[hash] = info 510 511 if err := devices.saveMetadata(info); err != nil { 512 // Try to remove unused device 513 delete(devices.Devices, hash) 514 return nil, err 515 } 516 517 return info, nil 518 } 519 520 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error { 521 logrus.Debugf("devmapper: activateDeviceIfNeeded(%v)", info.Hash) 522 523 if info.Deleted && !ignoreDeleted { 524 return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash) 525 } 526 527 // Make sure deferred removal on device is canceled, if one was 528 // scheduled. 529 if err := devices.cancelDeferredRemoval(info); err != nil { 530 return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err) 531 } 532 533 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 534 return nil 535 } 536 537 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 538 } 539 540 // Return true only if kernel supports xfs and mkfs.xfs is available 541 func xfsSupported() bool { 542 // Make sure mkfs.xfs is available 543 if _, err := exec.LookPath("mkfs.xfs"); err != nil { 544 return false 545 } 546 547 // Check if kernel supports xfs filesystem or not. 548 exec.Command("modprobe", "xfs").Run() 549 550 f, err := os.Open("/proc/filesystems") 551 if err != nil { 552 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 553 return false 554 } 555 defer f.Close() 556 557 s := bufio.NewScanner(f) 558 for s.Scan() { 559 if strings.HasSuffix(s.Text(), "\txfs") { 560 return true 561 } 562 } 563 564 if err := s.Err(); err != nil { 565 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 566 } 567 return false 568 } 569 570 func determineDefaultFS() string { 571 if xfsSupported() { 572 return "xfs" 573 } 574 575 logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem") 576 return "ext4" 577 } 578 579 func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) { 580 devname := info.DevName() 581 582 args := []string{} 583 for _, arg := range devices.mkfsArgs { 584 args = append(args, arg) 585 } 586 587 args = append(args, devname) 588 589 if devices.filesystem == "" { 590 devices.filesystem = determineDefaultFS() 591 } 592 if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil { 593 return err 594 } 595 596 logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name()) 597 defer func() { 598 if err != nil { 599 logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err) 600 } else { 601 logrus.Infof("devmapper: Successfully created filesystem %s on device %s", devices.filesystem, info.Name()) 602 } 603 }() 604 605 switch devices.filesystem { 606 case "xfs": 607 err = exec.Command("mkfs.xfs", args...).Run() 608 case "ext4": 609 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 610 if err != nil { 611 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 612 } 613 if err != nil { 614 return err 615 } 616 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 617 default: 618 err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem) 619 } 620 return 621 } 622 623 func (devices *DeviceSet) migrateOldMetaData() error { 624 // Migrate old metadata file 625 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 626 if err != nil && !os.IsNotExist(err) { 627 return err 628 } 629 630 if jsonData != nil { 631 m := metaData{Devices: make(map[string]*devInfo)} 632 633 if err := json.Unmarshal(jsonData, &m); err != nil { 634 return err 635 } 636 637 for hash, info := range m.Devices { 638 info.Hash = hash 639 devices.saveMetadata(info) 640 } 641 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 642 return err 643 } 644 645 } 646 647 return nil 648 } 649 650 // Cleanup deleted devices. It assumes that all the devices have been 651 // loaded in the hash table. 652 func (devices *DeviceSet) cleanupDeletedDevices() error { 653 devices.Lock() 654 655 // If there are no deleted devices, there is nothing to do. 656 if devices.nrDeletedDevices == 0 { 657 devices.Unlock() 658 return nil 659 } 660 661 var deletedDevices []*devInfo 662 663 for _, info := range devices.Devices { 664 if !info.Deleted { 665 continue 666 } 667 logrus.Debugf("devmapper: Found deleted device %s.", info.Hash) 668 deletedDevices = append(deletedDevices, info) 669 } 670 671 // Delete the deleted devices. DeleteDevice() first takes the info lock 672 // and then devices.Lock(). So drop it to avoid deadlock. 673 devices.Unlock() 674 675 for _, info := range deletedDevices { 676 // This will again try deferred deletion. 677 if err := devices.DeleteDevice(info.Hash, false); err != nil { 678 logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err) 679 } 680 } 681 682 return nil 683 } 684 685 func (devices *DeviceSet) countDeletedDevices() { 686 for _, info := range devices.Devices { 687 if !info.Deleted { 688 continue 689 } 690 devices.nrDeletedDevices++ 691 } 692 } 693 694 func (devices *DeviceSet) startDeviceDeletionWorker() { 695 // Deferred deletion is not enabled. Don't do anything. 696 if !devices.deferredDelete { 697 return 698 } 699 700 logrus.Debugf("devmapper: Worker to cleanup deleted devices started") 701 for range devices.deletionWorkerTicker.C { 702 devices.cleanupDeletedDevices() 703 } 704 } 705 706 func (devices *DeviceSet) initMetaData() error { 707 devices.Lock() 708 defer devices.Unlock() 709 710 if err := devices.migrateOldMetaData(); err != nil { 711 return err 712 } 713 714 _, transactionID, _, _, _, _, err := devices.poolStatus() 715 if err != nil { 716 return err 717 } 718 719 devices.TransactionID = transactionID 720 721 if err := devices.loadDeviceFilesOnStart(); err != nil { 722 return fmt.Errorf("devmapper: Failed to load device files:%v", err) 723 } 724 725 devices.constructDeviceIDMap() 726 devices.countDeletedDevices() 727 728 if err := devices.processPendingTransaction(); err != nil { 729 return err 730 } 731 732 // Start a goroutine to cleanup Deleted Devices 733 go devices.startDeviceDeletionWorker() 734 return nil 735 } 736 737 func (devices *DeviceSet) incNextDeviceID() { 738 // IDs are 24bit, so wrap around 739 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 740 } 741 742 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 743 devices.incNextDeviceID() 744 for i := 0; i <= maxDeviceID; i++ { 745 if devices.isDeviceIDFree(devices.NextDeviceID) { 746 devices.markDeviceIDUsed(devices.NextDeviceID) 747 return devices.NextDeviceID, nil 748 } 749 devices.incNextDeviceID() 750 } 751 752 return 0, fmt.Errorf("devmapper: Unable to find a free device ID") 753 } 754 755 func (devices *DeviceSet) poolHasFreeSpace() error { 756 if devices.minFreeSpacePercent == 0 { 757 return nil 758 } 759 760 _, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 761 if err != nil { 762 return err 763 } 764 765 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 766 if minFreeData < 1 { 767 minFreeData = 1 768 } 769 dataFree := dataTotal - dataUsed 770 if dataFree < minFreeData { 771 return fmt.Errorf("devmapper: Thin Pool has %v free data blocks which is less than minimum required %v free data blocks. Create more free space in thin pool or use dm.min_free_space option to change behavior", (dataTotal - dataUsed), minFreeData) 772 } 773 774 minFreeMetadata := (metadataTotal * uint64(devices.minFreeSpacePercent)) / 100 775 if minFreeMetadata < 1 { 776 minFreeMetadata = 1 777 } 778 779 metadataFree := metadataTotal - metadataUsed 780 if metadataFree < minFreeMetadata { 781 return fmt.Errorf("devmapper: Thin Pool has %v free metadata blocks which is less than minimum required %v free metadata blocks. Create more free metadata space in thin pool or use dm.min_free_space option to change behavior", (metadataTotal - metadataUsed), minFreeMetadata) 782 } 783 784 return nil 785 } 786 787 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 788 devices.Lock() 789 defer devices.Unlock() 790 791 deviceID, err := devices.getNextFreeDeviceID() 792 if err != nil { 793 return nil, err 794 } 795 796 if err := devices.openTransaction(hash, deviceID); err != nil { 797 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 798 devices.markDeviceIDFree(deviceID) 799 return nil, err 800 } 801 802 for { 803 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 804 if devicemapper.DeviceIDExists(err) { 805 // Device ID already exists. This should not 806 // happen. Now we have a mechanism to find 807 // a free device ID. So something is not right. 808 // Give a warning and continue. 809 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 810 deviceID, err = devices.getNextFreeDeviceID() 811 if err != nil { 812 return nil, err 813 } 814 // Save new device id into transaction 815 devices.refreshTransaction(deviceID) 816 continue 817 } 818 logrus.Debugf("devmapper: Error creating device: %s", err) 819 devices.markDeviceIDFree(deviceID) 820 return nil, err 821 } 822 break 823 } 824 825 logrus.Debugf("devmapper: Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 826 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 827 if err != nil { 828 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 829 devices.markDeviceIDFree(deviceID) 830 return nil, err 831 } 832 833 if err := devices.closeTransaction(); err != nil { 834 devices.unregisterDevice(deviceID, hash) 835 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 836 devices.markDeviceIDFree(deviceID) 837 return nil, err 838 } 839 return info, nil 840 } 841 842 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo) error { 843 if err := devices.poolHasFreeSpace(); err != nil { 844 return err 845 } 846 847 deviceID, err := devices.getNextFreeDeviceID() 848 if err != nil { 849 return err 850 } 851 852 if err := devices.openTransaction(hash, deviceID); err != nil { 853 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 854 devices.markDeviceIDFree(deviceID) 855 return err 856 } 857 858 for { 859 if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil { 860 if devicemapper.DeviceIDExists(err) { 861 // Device ID already exists. This should not 862 // happen. Now we have a mechanism to find 863 // a free device ID. So something is not right. 864 // Give a warning and continue. 865 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 866 deviceID, err = devices.getNextFreeDeviceID() 867 if err != nil { 868 return err 869 } 870 // Save new device id into transaction 871 devices.refreshTransaction(deviceID) 872 continue 873 } 874 logrus.Debugf("devmapper: Error creating snap device: %s", err) 875 devices.markDeviceIDFree(deviceID) 876 return err 877 } 878 break 879 } 880 881 if _, err := devices.registerDevice(deviceID, hash, baseInfo.Size, devices.OpenTransactionID); err != nil { 882 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 883 devices.markDeviceIDFree(deviceID) 884 logrus.Debugf("devmapper: Error registering device: %s", err) 885 return err 886 } 887 888 if err := devices.closeTransaction(); err != nil { 889 devices.unregisterDevice(deviceID, hash) 890 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 891 devices.markDeviceIDFree(deviceID) 892 return err 893 } 894 return nil 895 } 896 897 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 898 info := &devInfo{Hash: hash, devices: devices} 899 900 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 901 if err != nil { 902 return nil 903 } 904 905 if err := json.Unmarshal(jsonData, &info); err != nil { 906 return nil 907 } 908 909 if info.DeviceID > maxDeviceID { 910 logrus.Errorf("devmapper: Ignoring Invalid DeviceId=%d", info.DeviceID) 911 return nil 912 } 913 914 return info 915 } 916 917 func getDeviceUUID(device string) (string, error) { 918 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 919 if err != nil { 920 return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err) 921 } 922 923 uuid := strings.TrimSuffix(string(out), "\n") 924 uuid = strings.TrimSpace(uuid) 925 logrus.Debugf("devmapper: UUID for device: %s is:%s", device, uuid) 926 return uuid, nil 927 } 928 929 func (devices *DeviceSet) getBaseDeviceSize() uint64 { 930 info, _ := devices.lookupDevice("") 931 if info == nil { 932 return 0 933 } 934 return info.Size 935 } 936 937 func (devices *DeviceSet) getBaseDeviceFS() string { 938 return devices.BaseDeviceFilesystem 939 } 940 941 func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error { 942 devices.Lock() 943 defer devices.Unlock() 944 945 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 946 return err 947 } 948 defer devices.deactivateDevice(baseInfo) 949 950 uuid, err := getDeviceUUID(baseInfo.DevName()) 951 if err != nil { 952 return err 953 } 954 955 if devices.BaseDeviceUUID != uuid { 956 return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID) 957 } 958 959 if devices.BaseDeviceFilesystem == "" { 960 fsType, err := ProbeFsType(baseInfo.DevName()) 961 if err != nil { 962 return err 963 } 964 if err := devices.saveBaseDeviceFilesystem(fsType); err != nil { 965 return err 966 } 967 } 968 969 // If user specified a filesystem using dm.fs option and current 970 // file system of base image is not same, warn user that dm.fs 971 // will be ignored. 972 if devices.BaseDeviceFilesystem != devices.filesystem { 973 logrus.Warnf("devmapper: Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem) 974 devices.filesystem = devices.BaseDeviceFilesystem 975 } 976 return nil 977 } 978 979 func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error { 980 devices.BaseDeviceFilesystem = fs 981 return devices.saveDeviceSetMetaData() 982 } 983 984 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 985 devices.Lock() 986 defer devices.Unlock() 987 988 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 989 return err 990 } 991 defer devices.deactivateDevice(baseInfo) 992 993 uuid, err := getDeviceUUID(baseInfo.DevName()) 994 if err != nil { 995 return err 996 } 997 998 devices.BaseDeviceUUID = uuid 999 return devices.saveDeviceSetMetaData() 1000 } 1001 1002 func (devices *DeviceSet) createBaseImage() error { 1003 logrus.Debugf("devmapper: Initializing base device-mapper thin volume") 1004 1005 // Create initial device 1006 info, err := devices.createRegisterDevice("") 1007 if err != nil { 1008 return err 1009 } 1010 1011 logrus.Debugf("devmapper: Creating filesystem on base device-mapper thin volume") 1012 1013 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1014 return err 1015 } 1016 1017 if err := devices.createFilesystem(info); err != nil { 1018 return err 1019 } 1020 1021 info.Initialized = true 1022 if err := devices.saveMetadata(info); err != nil { 1023 info.Initialized = false 1024 return err 1025 } 1026 1027 if err := devices.saveBaseDeviceUUID(info); err != nil { 1028 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1029 } 1030 1031 return nil 1032 } 1033 1034 // Returns if thin pool device exists or not. If device exists, also makes 1035 // sure it is a thin pool device and not some other type of device. 1036 func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) { 1037 logrus.Debugf("devmapper: Checking for existence of the pool %s", thinPoolDevice) 1038 1039 info, err := devicemapper.GetInfo(thinPoolDevice) 1040 if err != nil { 1041 return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err) 1042 } 1043 1044 // Device does not exist. 1045 if info.Exists == 0 { 1046 return false, nil 1047 } 1048 1049 _, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice) 1050 if err != nil { 1051 return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err) 1052 } 1053 1054 if deviceType != "thin-pool" { 1055 return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice) 1056 } 1057 1058 return true, nil 1059 } 1060 1061 func (devices *DeviceSet) checkThinPool() error { 1062 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 1063 if err != nil { 1064 return err 1065 } 1066 if dataUsed != 0 { 1067 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks", 1068 devices.thinPoolDevice) 1069 } 1070 if transactionID != 0 { 1071 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 1072 devices.thinPoolDevice) 1073 } 1074 return nil 1075 } 1076 1077 // Base image is initialized properly. Either save UUID for first time (for 1078 // upgrade case or verify UUID. 1079 func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error { 1080 // If BaseDeviceUUID is nil (upgrade case), save it and return success. 1081 if devices.BaseDeviceUUID == "" { 1082 if err := devices.saveBaseDeviceUUID(baseInfo); err != nil { 1083 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1084 } 1085 return nil 1086 } 1087 1088 if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil { 1089 return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed.%v", err) 1090 } 1091 1092 return nil 1093 } 1094 1095 func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error { 1096 1097 if !userBaseSize { 1098 return nil 1099 } 1100 1101 if devices.baseFsSize < devices.getBaseDeviceSize() { 1102 return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize()))) 1103 } 1104 1105 if devices.baseFsSize == devices.getBaseDeviceSize() { 1106 return nil 1107 } 1108 1109 info.lock.Lock() 1110 defer info.lock.Unlock() 1111 1112 devices.Lock() 1113 defer devices.Unlock() 1114 1115 info.Size = devices.baseFsSize 1116 1117 if err := devices.saveMetadata(info); err != nil { 1118 // Try to remove unused device 1119 delete(devices.Devices, info.Hash) 1120 return err 1121 } 1122 1123 return devices.growFS(info) 1124 } 1125 1126 func (devices *DeviceSet) growFS(info *devInfo) error { 1127 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1128 return fmt.Errorf("Error activating devmapper device: %s", err) 1129 } 1130 1131 defer devices.deactivateDevice(info) 1132 1133 fsMountPoint := "/run/docker/mnt" 1134 if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) { 1135 if err := os.MkdirAll(fsMountPoint, 0700); err != nil { 1136 return err 1137 } 1138 defer os.RemoveAll(fsMountPoint) 1139 } 1140 1141 options := "" 1142 if devices.BaseDeviceFilesystem == "xfs" { 1143 // XFS needs nouuid or it can't mount filesystems with the same fs 1144 options = joinMountOptions(options, "nouuid") 1145 } 1146 options = joinMountOptions(options, devices.mountOptions) 1147 1148 if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil { 1149 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err) 1150 } 1151 1152 defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH) 1153 1154 switch devices.BaseDeviceFilesystem { 1155 case "ext4": 1156 if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil { 1157 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1158 } 1159 case "xfs": 1160 if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil { 1161 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1162 } 1163 default: 1164 return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem) 1165 } 1166 return nil 1167 } 1168 1169 func (devices *DeviceSet) setupBaseImage() error { 1170 oldInfo, _ := devices.lookupDeviceWithLock("") 1171 1172 // base image already exists. If it is initialized properly, do UUID 1173 // verification and return. Otherwise remove image and set it up 1174 // fresh. 1175 1176 if oldInfo != nil { 1177 if oldInfo.Initialized && !oldInfo.Deleted { 1178 if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil { 1179 return err 1180 } 1181 1182 if err := devices.checkGrowBaseDeviceFS(oldInfo); err != nil { 1183 return err 1184 } 1185 1186 return nil 1187 } 1188 1189 logrus.Debugf("devmapper: Removing uninitialized base image") 1190 // If previous base device is in deferred delete state, 1191 // that needs to be cleaned up first. So don't try 1192 // deferred deletion. 1193 if err := devices.DeleteDevice("", true); err != nil { 1194 return err 1195 } 1196 } 1197 1198 // If we are setting up base image for the first time, make sure 1199 // thin pool is empty. 1200 if devices.thinPoolDevice != "" && oldInfo == nil { 1201 if err := devices.checkThinPool(); err != nil { 1202 return err 1203 } 1204 } 1205 1206 // Create new base image device 1207 if err := devices.createBaseImage(); err != nil { 1208 return err 1209 } 1210 1211 return nil 1212 } 1213 1214 func setCloseOnExec(name string) { 1215 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 1216 for _, i := range fileInfos { 1217 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 1218 if link == name { 1219 fd, err := strconv.Atoi(i.Name()) 1220 if err == nil { 1221 syscall.CloseOnExec(fd) 1222 } 1223 } 1224 } 1225 } 1226 } 1227 1228 // DMLog implements logging using DevMapperLogger interface. 1229 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 1230 // By default libdm sends us all the messages including debug ones. 1231 // We need to filter out messages here and figure out which one 1232 // should be printed. 1233 if level > logLevel { 1234 return 1235 } 1236 1237 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1238 if level <= devicemapper.LogLevelErr { 1239 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1240 } else if level <= devicemapper.LogLevelInfo { 1241 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1242 } else { 1243 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1244 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1245 } 1246 } 1247 1248 func major(device uint64) uint64 { 1249 return (device >> 8) & 0xfff 1250 } 1251 1252 func minor(device uint64) uint64 { 1253 return (device & 0xff) | ((device >> 12) & 0xfff00) 1254 } 1255 1256 // ResizePool increases the size of the pool. 1257 func (devices *DeviceSet) ResizePool(size int64) error { 1258 dirname := devices.loopbackDir() 1259 datafilename := path.Join(dirname, "data") 1260 if len(devices.dataDevice) > 0 { 1261 datafilename = devices.dataDevice 1262 } 1263 metadatafilename := path.Join(dirname, "metadata") 1264 if len(devices.metadataDevice) > 0 { 1265 metadatafilename = devices.metadataDevice 1266 } 1267 1268 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 1269 if datafile == nil { 1270 return err 1271 } 1272 defer datafile.Close() 1273 1274 fi, err := datafile.Stat() 1275 if fi == nil { 1276 return err 1277 } 1278 1279 if fi.Size() > size { 1280 return fmt.Errorf("devmapper: Can't shrink file") 1281 } 1282 1283 dataloopback := loopback.FindLoopDeviceFor(datafile) 1284 if dataloopback == nil { 1285 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename) 1286 } 1287 defer dataloopback.Close() 1288 1289 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 1290 if metadatafile == nil { 1291 return err 1292 } 1293 defer metadatafile.Close() 1294 1295 metadataloopback := loopback.FindLoopDeviceFor(metadatafile) 1296 if metadataloopback == nil { 1297 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename) 1298 } 1299 defer metadataloopback.Close() 1300 1301 // Grow loopback file 1302 if err := datafile.Truncate(size); err != nil { 1303 return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err) 1304 } 1305 1306 // Reload size for loopback device 1307 if err := loopback.SetCapacity(dataloopback); err != nil { 1308 return fmt.Errorf("Unable to update loopback capacity: %s", err) 1309 } 1310 1311 // Suspend the pool 1312 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 1313 return fmt.Errorf("devmapper: Unable to suspend pool: %s", err) 1314 } 1315 1316 // Reload with the new block sizes 1317 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 1318 return fmt.Errorf("devmapper: Unable to reload pool: %s", err) 1319 } 1320 1321 // Resume the pool 1322 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 1323 return fmt.Errorf("devmapper: Unable to resume pool: %s", err) 1324 } 1325 1326 return nil 1327 } 1328 1329 func (devices *DeviceSet) loadTransactionMetaData() error { 1330 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 1331 if err != nil { 1332 // There is no active transaction. This will be the case 1333 // during upgrade. 1334 if os.IsNotExist(err) { 1335 devices.OpenTransactionID = devices.TransactionID 1336 return nil 1337 } 1338 return err 1339 } 1340 1341 json.Unmarshal(jsonData, &devices.transaction) 1342 return nil 1343 } 1344 1345 func (devices *DeviceSet) saveTransactionMetaData() error { 1346 jsonData, err := json.Marshal(&devices.transaction) 1347 if err != nil { 1348 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1349 } 1350 1351 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 1352 } 1353 1354 func (devices *DeviceSet) removeTransactionMetaData() error { 1355 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 1356 return err 1357 } 1358 return nil 1359 } 1360 1361 func (devices *DeviceSet) rollbackTransaction() error { 1362 logrus.Debugf("devmapper: Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 1363 1364 // A device id might have already been deleted before transaction 1365 // closed. In that case this call will fail. Just leave a message 1366 // in case of failure. 1367 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 1368 logrus.Errorf("devmapper: Unable to delete device: %s", err) 1369 } 1370 1371 dinfo := &devInfo{Hash: devices.DeviceIDHash} 1372 if err := devices.removeMetadata(dinfo); err != nil { 1373 logrus.Errorf("devmapper: Unable to remove metadata: %s", err) 1374 } else { 1375 devices.markDeviceIDFree(devices.DeviceID) 1376 } 1377 1378 if err := devices.removeTransactionMetaData(); err != nil { 1379 logrus.Errorf("devmapper: Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 1380 } 1381 1382 return nil 1383 } 1384 1385 func (devices *DeviceSet) processPendingTransaction() error { 1386 if err := devices.loadTransactionMetaData(); err != nil { 1387 return err 1388 } 1389 1390 // If there was open transaction but pool transaction ID is same 1391 // as open transaction ID, nothing to roll back. 1392 if devices.TransactionID == devices.OpenTransactionID { 1393 return nil 1394 } 1395 1396 // If open transaction ID is less than pool transaction ID, something 1397 // is wrong. Bail out. 1398 if devices.OpenTransactionID < devices.TransactionID { 1399 logrus.Errorf("devmapper: Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1400 return nil 1401 } 1402 1403 // Pool transaction ID is not same as open transaction. There is 1404 // a transaction which was not completed. 1405 if err := devices.rollbackTransaction(); err != nil { 1406 return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err) 1407 } 1408 1409 devices.OpenTransactionID = devices.TransactionID 1410 return nil 1411 } 1412 1413 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1414 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1415 if err != nil { 1416 // For backward compatibility return success if file does 1417 // not exist. 1418 if os.IsNotExist(err) { 1419 return nil 1420 } 1421 return err 1422 } 1423 1424 return json.Unmarshal(jsonData, devices) 1425 } 1426 1427 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1428 jsonData, err := json.Marshal(devices) 1429 if err != nil { 1430 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1431 } 1432 1433 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1434 } 1435 1436 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1437 devices.allocateTransactionID() 1438 devices.DeviceIDHash = hash 1439 devices.DeviceID = DeviceID 1440 if err := devices.saveTransactionMetaData(); err != nil { 1441 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1442 } 1443 return nil 1444 } 1445 1446 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1447 devices.DeviceID = DeviceID 1448 if err := devices.saveTransactionMetaData(); err != nil { 1449 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1450 } 1451 return nil 1452 } 1453 1454 func (devices *DeviceSet) closeTransaction() error { 1455 if err := devices.updatePoolTransactionID(); err != nil { 1456 logrus.Debugf("devmapper: Failed to close Transaction") 1457 return err 1458 } 1459 return nil 1460 } 1461 1462 func determineDriverCapabilities(version string) error { 1463 /* 1464 * Driver version 4.27.0 and greater support deferred activation 1465 * feature. 1466 */ 1467 1468 logrus.Debugf("devicemapper: driver version is %s", version) 1469 1470 versionSplit := strings.Split(version, ".") 1471 major, err := strconv.Atoi(versionSplit[0]) 1472 if err != nil { 1473 return graphdriver.ErrNotSupported 1474 } 1475 1476 if major > 4 { 1477 driverDeferredRemovalSupport = true 1478 return nil 1479 } 1480 1481 if major < 4 { 1482 return nil 1483 } 1484 1485 minor, err := strconv.Atoi(versionSplit[1]) 1486 if err != nil { 1487 return graphdriver.ErrNotSupported 1488 } 1489 1490 /* 1491 * If major is 4 and minor is 27, then there is no need to 1492 * check for patch level as it can not be less than 0. 1493 */ 1494 if minor >= 27 { 1495 driverDeferredRemovalSupport = true 1496 return nil 1497 } 1498 1499 return nil 1500 } 1501 1502 // Determine the major and minor number of loopback device 1503 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1504 stat, err := file.Stat() 1505 if err != nil { 1506 return 0, 0, err 1507 } 1508 1509 dev := stat.Sys().(*syscall.Stat_t).Rdev 1510 majorNum := major(dev) 1511 minorNum := minor(dev) 1512 1513 logrus.Debugf("devmapper: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1514 return majorNum, minorNum, nil 1515 } 1516 1517 // Given a file which is backing file of a loop back device, find the 1518 // loopback device name and its major/minor number. 1519 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1520 file, err := os.Open(filename) 1521 if err != nil { 1522 logrus.Debugf("devmapper: Failed to open file %s", filename) 1523 return "", 0, 0, err 1524 } 1525 1526 defer file.Close() 1527 loopbackDevice := loopback.FindLoopDeviceFor(file) 1528 if loopbackDevice == nil { 1529 return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename) 1530 } 1531 defer loopbackDevice.Close() 1532 1533 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1534 if err != nil { 1535 return "", 0, 0, err 1536 } 1537 return loopbackDevice.Name(), Major, Minor, nil 1538 } 1539 1540 // Get the major/minor numbers of thin pool data and metadata devices 1541 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1542 var params, poolDataMajMin, poolMetadataMajMin string 1543 1544 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1545 if err != nil { 1546 return 0, 0, 0, 0, err 1547 } 1548 1549 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1550 return 0, 0, 0, 0, err 1551 } 1552 1553 logrus.Debugf("devmapper: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1554 1555 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1556 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1557 if err != nil { 1558 return 0, 0, 0, 0, err 1559 } 1560 1561 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1562 if err != nil { 1563 return 0, 0, 0, 0, err 1564 } 1565 1566 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1567 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1568 if err != nil { 1569 return 0, 0, 0, 0, err 1570 } 1571 1572 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1573 if err != nil { 1574 return 0, 0, 0, 0, err 1575 } 1576 1577 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1578 } 1579 1580 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1581 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1582 if err != nil { 1583 return err 1584 } 1585 1586 dirname := devices.loopbackDir() 1587 1588 // data device has not been passed in. So there should be a data file 1589 // which is being mounted as loop device. 1590 if devices.dataDevice == "" { 1591 datafilename := path.Join(dirname, "data") 1592 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1593 if err != nil { 1594 return err 1595 } 1596 1597 // Compare the two 1598 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1599 devices.dataDevice = dataLoopDevice 1600 devices.dataLoopFile = datafilename 1601 } 1602 1603 } 1604 1605 // metadata device has not been passed in. So there should be a 1606 // metadata file which is being mounted as loop device. 1607 if devices.metadataDevice == "" { 1608 metadatafilename := path.Join(dirname, "metadata") 1609 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1610 if err != nil { 1611 return err 1612 } 1613 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1614 devices.metadataDevice = metadataLoopDevice 1615 devices.metadataLoopFile = metadatafilename 1616 } 1617 } 1618 1619 return nil 1620 } 1621 1622 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1623 // give ourselves to libdm as a log handler 1624 devicemapper.LogInit(devices) 1625 1626 version, err := devicemapper.GetDriverVersion() 1627 if err != nil { 1628 // Can't even get driver version, assume not supported 1629 return graphdriver.ErrNotSupported 1630 } 1631 1632 if err := determineDriverCapabilities(version); err != nil { 1633 return graphdriver.ErrNotSupported 1634 } 1635 1636 // If user asked for deferred removal then check both libdm library 1637 // and kernel driver support deferred removal otherwise error out. 1638 if enableDeferredRemoval { 1639 if !driverDeferredRemovalSupport { 1640 return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it") 1641 } 1642 if !devicemapper.LibraryDeferredRemovalSupport { 1643 return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it") 1644 } 1645 logrus.Debugf("devmapper: Deferred removal support enabled.") 1646 devices.deferredRemove = true 1647 } 1648 1649 if enableDeferredDeletion { 1650 if !devices.deferredRemove { 1651 return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter") 1652 } 1653 logrus.Debugf("devmapper: Deferred deletion support enabled.") 1654 devices.deferredDelete = true 1655 } 1656 1657 // https://github.com/docker/docker/issues/4036 1658 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1659 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1660 if !devices.overrideUdevSyncCheck { 1661 return graphdriver.ErrNotSupported 1662 } 1663 } 1664 1665 //create the root dir of the devmapper driver ownership to match this 1666 //daemon's remapped root uid/gid so containers can start properly 1667 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 1668 if err != nil { 1669 return err 1670 } 1671 if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) { 1672 return err 1673 } 1674 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) { 1675 return err 1676 } 1677 1678 // Set the device prefix from the device id and inode of the docker root dir 1679 1680 st, err := os.Stat(devices.root) 1681 if err != nil { 1682 return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err) 1683 } 1684 sysSt := st.Sys().(*syscall.Stat_t) 1685 // "reg-" stands for "regular file". 1686 // In the future we might use "dev-" for "device file", etc. 1687 // docker-maj,min[-inode] stands for: 1688 // - Managed by docker 1689 // - The target of this device is at major <maj> and minor <min> 1690 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1691 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1692 logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix) 1693 1694 // Check for the existence of the thin-pool device 1695 poolExists, err := devices.thinPoolExists(devices.getPoolName()) 1696 if err != nil { 1697 return err 1698 } 1699 1700 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1701 // that are not Close-on-exec, 1702 // so we add this badhack to make sure it closes itself 1703 setCloseOnExec("/dev/mapper/control") 1704 1705 // Make sure the sparse images exist in <root>/devicemapper/data and 1706 // <root>/devicemapper/metadata 1707 1708 createdLoopback := false 1709 1710 // If the pool doesn't exist, create it 1711 if !poolExists && devices.thinPoolDevice == "" { 1712 logrus.Debugf("devmapper: Pool doesn't exist. Creating it.") 1713 1714 var ( 1715 dataFile *os.File 1716 metadataFile *os.File 1717 ) 1718 1719 if devices.dataDevice == "" { 1720 // Make sure the sparse images exist in <root>/devicemapper/data 1721 1722 hasData := devices.hasImage("data") 1723 1724 if !doInit && !hasData { 1725 return errors.New("Loopback data file not found") 1726 } 1727 1728 if !hasData { 1729 createdLoopback = true 1730 } 1731 1732 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1733 if err != nil { 1734 logrus.Debugf("devmapper: Error device ensureImage (data): %s", err) 1735 return err 1736 } 1737 1738 dataFile, err = loopback.AttachLoopDevice(data) 1739 if err != nil { 1740 return err 1741 } 1742 devices.dataLoopFile = data 1743 devices.dataDevice = dataFile.Name() 1744 } else { 1745 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1746 if err != nil { 1747 return err 1748 } 1749 } 1750 defer dataFile.Close() 1751 1752 if devices.metadataDevice == "" { 1753 // Make sure the sparse images exist in <root>/devicemapper/metadata 1754 1755 hasMetadata := devices.hasImage("metadata") 1756 1757 if !doInit && !hasMetadata { 1758 return errors.New("Loopback metadata file not found") 1759 } 1760 1761 if !hasMetadata { 1762 createdLoopback = true 1763 } 1764 1765 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1766 if err != nil { 1767 logrus.Debugf("devmapper: Error device ensureImage (metadata): %s", err) 1768 return err 1769 } 1770 1771 metadataFile, err = loopback.AttachLoopDevice(metadata) 1772 if err != nil { 1773 return err 1774 } 1775 devices.metadataLoopFile = metadata 1776 devices.metadataDevice = metadataFile.Name() 1777 } else { 1778 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1779 if err != nil { 1780 return err 1781 } 1782 } 1783 defer metadataFile.Close() 1784 1785 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1786 return err 1787 } 1788 } 1789 1790 // Pool already exists and caller did not pass us a pool. That means 1791 // we probably created pool earlier and could not remove it as some 1792 // containers were still using it. Detect some of the properties of 1793 // pool, like is it using loop devices. 1794 if poolExists && devices.thinPoolDevice == "" { 1795 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1796 logrus.Debugf("devmapper: Failed to load thin pool loopback device information:%v", err) 1797 return err 1798 } 1799 } 1800 1801 // If we didn't just create the data or metadata image, we need to 1802 // load the transaction id and migrate old metadata 1803 if !createdLoopback { 1804 if err := devices.initMetaData(); err != nil { 1805 return err 1806 } 1807 } 1808 1809 if devices.thinPoolDevice == "" { 1810 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1811 logrus.Warnf("devmapper: Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1812 } 1813 } 1814 1815 // Right now this loads only NextDeviceID. If there is more metadata 1816 // down the line, we might have to move it earlier. 1817 if err := devices.loadDeviceSetMetaData(); err != nil { 1818 return err 1819 } 1820 1821 // Setup the base image 1822 if doInit { 1823 if err := devices.setupBaseImage(); err != nil { 1824 logrus.Debugf("devmapper: Error device setupBaseImage: %s", err) 1825 return err 1826 } 1827 } 1828 1829 return nil 1830 } 1831 1832 // AddDevice adds a device and registers in the hash. 1833 func (devices *DeviceSet) AddDevice(hash, baseHash string) error { 1834 logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash) 1835 defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1836 1837 // If a deleted device exists, return error. 1838 baseInfo, err := devices.lookupDeviceWithLock(baseHash) 1839 if err != nil { 1840 return err 1841 } 1842 1843 if baseInfo.Deleted { 1844 return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash) 1845 } 1846 1847 baseInfo.lock.Lock() 1848 defer baseInfo.lock.Unlock() 1849 1850 devices.Lock() 1851 defer devices.Unlock() 1852 1853 // Also include deleted devices in case hash of new device is 1854 // same as one of the deleted devices. 1855 if info, _ := devices.lookupDevice(hash); info != nil { 1856 return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted) 1857 } 1858 1859 if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil { 1860 return err 1861 } 1862 1863 return nil 1864 } 1865 1866 func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error { 1867 // If device is already in deleted state, there is nothing to be done. 1868 if info.Deleted { 1869 return nil 1870 } 1871 1872 logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash) 1873 1874 info.Deleted = true 1875 1876 // save device metadata to reflect deleted state. 1877 if err := devices.saveMetadata(info); err != nil { 1878 info.Deleted = false 1879 return err 1880 } 1881 1882 devices.nrDeletedDevices++ 1883 return nil 1884 } 1885 1886 // Should be called with devices.Lock() held. 1887 func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error { 1888 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1889 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceId = %d", "", info.DeviceID) 1890 return err 1891 } 1892 1893 defer devices.closeTransaction() 1894 1895 err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID) 1896 if err != nil { 1897 // If syncDelete is true, we want to return error. If deferred 1898 // deletion is not enabled, we return an error. If error is 1899 // something other then EBUSY, return an error. 1900 if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy { 1901 logrus.Debugf("devmapper: Error deleting device: %s", err) 1902 return err 1903 } 1904 } 1905 1906 if err == nil { 1907 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 1908 return err 1909 } 1910 // If device was already in deferred delete state that means 1911 // deletion was being tried again later. Reduce the deleted 1912 // device count. 1913 if info.Deleted { 1914 devices.nrDeletedDevices-- 1915 } 1916 devices.markDeviceIDFree(info.DeviceID) 1917 } else { 1918 if err := devices.markForDeferredDeletion(info); err != nil { 1919 return err 1920 } 1921 } 1922 1923 return nil 1924 } 1925 1926 // Issue discard only if device open count is zero. 1927 func (devices *DeviceSet) issueDiscard(info *devInfo) error { 1928 logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash) 1929 defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash) 1930 // This is a workaround for the kernel not discarding block so 1931 // on the thin pool when we remove a thinp device, so we do it 1932 // manually. 1933 // Even if device is deferred deleted, activate it and issue 1934 // discards. 1935 if err := devices.activateDeviceIfNeeded(info, true); err != nil { 1936 return err 1937 } 1938 1939 devinfo, err := devicemapper.GetInfo(info.Name()) 1940 if err != nil { 1941 return err 1942 } 1943 1944 if devinfo.OpenCount != 0 { 1945 logrus.Debugf("devmapper: Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount) 1946 return nil 1947 } 1948 1949 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 1950 logrus.Debugf("devmapper: Error discarding block on device: %s (ignoring)", err) 1951 } 1952 return nil 1953 } 1954 1955 // Should be called with devices.Lock() held. 1956 func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error { 1957 if devices.doBlkDiscard { 1958 devices.issueDiscard(info) 1959 } 1960 1961 // Try to deactivate device in case it is active. 1962 if err := devices.deactivateDevice(info); err != nil { 1963 logrus.Debugf("devmapper: Error deactivating device: %s", err) 1964 return err 1965 } 1966 1967 if err := devices.deleteTransaction(info, syncDelete); err != nil { 1968 return err 1969 } 1970 1971 return nil 1972 } 1973 1974 // DeleteDevice will return success if device has been marked for deferred 1975 // removal. If one wants to override that and want DeleteDevice() to fail if 1976 // device was busy and could not be deleted, set syncDelete=true. 1977 func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error { 1978 logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete) 1979 defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete) 1980 info, err := devices.lookupDeviceWithLock(hash) 1981 if err != nil { 1982 return err 1983 } 1984 1985 info.lock.Lock() 1986 defer info.lock.Unlock() 1987 1988 devices.Lock() 1989 defer devices.Unlock() 1990 1991 return devices.deleteDevice(info, syncDelete) 1992 } 1993 1994 func (devices *DeviceSet) deactivatePool() error { 1995 logrus.Debugf("devmapper: deactivatePool()") 1996 defer logrus.Debugf("devmapper: deactivatePool END") 1997 devname := devices.getPoolDevName() 1998 1999 devinfo, err := devicemapper.GetInfo(devname) 2000 if err != nil { 2001 return err 2002 } 2003 2004 if devinfo.Exists == 0 { 2005 return nil 2006 } 2007 if err := devicemapper.RemoveDevice(devname); err != nil { 2008 return err 2009 } 2010 2011 if d, err := devicemapper.GetDeps(devname); err == nil { 2012 logrus.Warnf("devmapper: device %s still has %d active dependents", devname, d.Count) 2013 } 2014 2015 return nil 2016 } 2017 2018 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 2019 logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash) 2020 defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash) 2021 2022 devinfo, err := devicemapper.GetInfo(info.Name()) 2023 if err != nil { 2024 return err 2025 } 2026 2027 if devinfo.Exists == 0 { 2028 return nil 2029 } 2030 2031 if devices.deferredRemove { 2032 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 2033 return err 2034 } 2035 } else { 2036 if err := devices.removeDevice(info.Name()); err != nil { 2037 return err 2038 } 2039 } 2040 return nil 2041 } 2042 2043 // Issues the underlying dm remove operation. 2044 func (devices *DeviceSet) removeDevice(devname string) error { 2045 var err error 2046 2047 logrus.Debugf("devmapper: removeDevice START(%s)", devname) 2048 defer logrus.Debugf("devmapper: removeDevice END(%s)", devname) 2049 2050 for i := 0; i < 200; i++ { 2051 err = devicemapper.RemoveDevice(devname) 2052 if err == nil { 2053 break 2054 } 2055 if err != devicemapper.ErrBusy { 2056 return err 2057 } 2058 2059 // If we see EBUSY it may be a transient error, 2060 // sleep a bit a retry a few times. 2061 devices.Unlock() 2062 time.Sleep(100 * time.Millisecond) 2063 devices.Lock() 2064 } 2065 2066 return err 2067 } 2068 2069 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 2070 if !devices.deferredRemove { 2071 return nil 2072 } 2073 2074 logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name()) 2075 defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name()) 2076 2077 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 2078 2079 if devinfo != nil && devinfo.DeferredRemove == 0 { 2080 return nil 2081 } 2082 2083 // Cancel deferred remove 2084 for i := 0; i < 100; i++ { 2085 err = devicemapper.CancelDeferredRemove(info.Name()) 2086 if err == nil { 2087 break 2088 } 2089 2090 if err == devicemapper.ErrEnxio { 2091 // Device is probably already gone. Return success. 2092 return nil 2093 } 2094 2095 if err != devicemapper.ErrBusy { 2096 return err 2097 } 2098 2099 // If we see EBUSY it may be a transient error, 2100 // sleep a bit a retry a few times. 2101 devices.Unlock() 2102 time.Sleep(100 * time.Millisecond) 2103 devices.Lock() 2104 } 2105 return err 2106 } 2107 2108 // Shutdown shuts down the device by unmounting the root. 2109 func (devices *DeviceSet) Shutdown(home string) error { 2110 logrus.Debugf("devmapper: [deviceset %s] Shutdown()", devices.devicePrefix) 2111 logrus.Debugf("devmapper: Shutting down DeviceSet: %s", devices.root) 2112 defer logrus.Debugf("devmapper: [deviceset %s] Shutdown() END", devices.devicePrefix) 2113 2114 // Stop deletion worker. This should start delivering new events to 2115 // ticker channel. That means no new instance of cleanupDeletedDevice() 2116 // will run after this call. If one instance is already running at 2117 // the time of the call, it must be holding devices.Lock() and 2118 // we will block on this lock till cleanup function exits. 2119 devices.deletionWorkerTicker.Stop() 2120 2121 devices.Lock() 2122 // Save DeviceSet Metadata first. Docker kills all threads if they 2123 // don't finish in certain time. It is possible that Shutdown() 2124 // routine does not finish in time as we loop trying to deactivate 2125 // some devices while these are busy. In that case shutdown() routine 2126 // will be killed and we will not get a chance to save deviceset 2127 // metadata. Hence save this early before trying to deactivate devices. 2128 devices.saveDeviceSetMetaData() 2129 2130 // ignore the error since it's just a best effort to not try to unmount something that's mounted 2131 mounts, _ := mount.GetMounts() 2132 mounted := make(map[string]bool, len(mounts)) 2133 for _, mnt := range mounts { 2134 mounted[mnt.Mountpoint] = true 2135 } 2136 2137 if err := filepath.Walk(path.Join(home, "mnt"), func(p string, info os.FileInfo, err error) error { 2138 if err != nil { 2139 return err 2140 } 2141 if !info.IsDir() { 2142 return nil 2143 } 2144 2145 if mounted[p] { 2146 // We use MNT_DETACH here in case it is still busy in some running 2147 // container. This means it'll go away from the global scope directly, 2148 // and the device will be released when that container dies. 2149 if err := syscall.Unmount(p, syscall.MNT_DETACH); err != nil { 2150 logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", p, err) 2151 } 2152 } 2153 2154 if devInfo, err := devices.lookupDevice(path.Base(p)); err != nil { 2155 logrus.Debugf("devmapper: Shutdown lookup device %s, error: %s", path.Base(p), err) 2156 } else { 2157 if err := devices.deactivateDevice(devInfo); err != nil { 2158 logrus.Debugf("devmapper: Shutdown deactivate %s , error: %s", devInfo.Hash, err) 2159 } 2160 } 2161 2162 return nil 2163 }); err != nil && !os.IsNotExist(err) { 2164 devices.Unlock() 2165 return err 2166 } 2167 2168 devices.Unlock() 2169 2170 info, _ := devices.lookupDeviceWithLock("") 2171 if info != nil { 2172 info.lock.Lock() 2173 devices.Lock() 2174 if err := devices.deactivateDevice(info); err != nil { 2175 logrus.Debugf("devmapper: Shutdown deactivate base , error: %s", err) 2176 } 2177 devices.Unlock() 2178 info.lock.Unlock() 2179 } 2180 2181 devices.Lock() 2182 if devices.thinPoolDevice == "" { 2183 if err := devices.deactivatePool(); err != nil { 2184 logrus.Debugf("devmapper: Shutdown deactivate pool , error: %s", err) 2185 } 2186 } 2187 devices.Unlock() 2188 2189 return nil 2190 } 2191 2192 // MountDevice mounts the device if not already mounted. 2193 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 2194 info, err := devices.lookupDeviceWithLock(hash) 2195 if err != nil { 2196 return err 2197 } 2198 2199 if info.Deleted { 2200 return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash) 2201 } 2202 2203 info.lock.Lock() 2204 defer info.lock.Unlock() 2205 2206 devices.Lock() 2207 defer devices.Unlock() 2208 2209 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2210 return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2211 } 2212 2213 fstype, err := ProbeFsType(info.DevName()) 2214 if err != nil { 2215 return err 2216 } 2217 2218 options := "" 2219 2220 if fstype == "xfs" { 2221 // XFS needs nouuid or it can't mount filesystems with the same fs 2222 options = joinMountOptions(options, "nouuid") 2223 } 2224 2225 options = joinMountOptions(options, devices.mountOptions) 2226 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 2227 2228 if err := mount.Mount(info.DevName(), path, fstype, options); err != nil { 2229 return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err) 2230 } 2231 2232 return nil 2233 } 2234 2235 // UnmountDevice unmounts the device and removes it from hash. 2236 func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { 2237 logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash) 2238 defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash) 2239 2240 info, err := devices.lookupDeviceWithLock(hash) 2241 if err != nil { 2242 return err 2243 } 2244 2245 info.lock.Lock() 2246 defer info.lock.Unlock() 2247 2248 devices.Lock() 2249 defer devices.Unlock() 2250 2251 logrus.Debugf("devmapper: Unmount(%s)", mountPath) 2252 if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil { 2253 return err 2254 } 2255 logrus.Debugf("devmapper: Unmount done") 2256 2257 if err := devices.deactivateDevice(info); err != nil { 2258 return err 2259 } 2260 2261 return nil 2262 } 2263 2264 // HasDevice returns true if the device metadata exists. 2265 func (devices *DeviceSet) HasDevice(hash string) bool { 2266 info, _ := devices.lookupDeviceWithLock(hash) 2267 return info != nil 2268 } 2269 2270 // List returns a list of device ids. 2271 func (devices *DeviceSet) List() []string { 2272 devices.Lock() 2273 defer devices.Unlock() 2274 2275 ids := make([]string, len(devices.Devices)) 2276 i := 0 2277 for k := range devices.Devices { 2278 ids[i] = k 2279 i++ 2280 } 2281 return ids 2282 } 2283 2284 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 2285 var params string 2286 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 2287 if err != nil { 2288 return 2289 } 2290 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 2291 return 2292 } 2293 return 2294 } 2295 2296 // GetDeviceStatus provides size, mapped sectors 2297 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 2298 info, err := devices.lookupDeviceWithLock(hash) 2299 if err != nil { 2300 return nil, err 2301 } 2302 2303 info.lock.Lock() 2304 defer info.lock.Unlock() 2305 2306 devices.Lock() 2307 defer devices.Unlock() 2308 2309 status := &DevStatus{ 2310 DeviceID: info.DeviceID, 2311 Size: info.Size, 2312 TransactionID: info.TransactionID, 2313 } 2314 2315 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2316 return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2317 } 2318 2319 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 2320 2321 if err != nil { 2322 return nil, err 2323 } 2324 2325 status.SizeInSectors = sizeInSectors 2326 status.MappedSectors = mappedSectors 2327 status.HighestMappedSector = highestMappedSector 2328 2329 return status, nil 2330 } 2331 2332 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 2333 var params string 2334 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 2335 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 2336 } 2337 return 2338 } 2339 2340 // DataDevicePath returns the path to the data storage for this deviceset, 2341 // regardless of loopback or block device 2342 func (devices *DeviceSet) DataDevicePath() string { 2343 return devices.dataDevice 2344 } 2345 2346 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 2347 // regardless of loopback or block device 2348 func (devices *DeviceSet) MetadataDevicePath() string { 2349 return devices.metadataDevice 2350 } 2351 2352 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 2353 buf := new(syscall.Statfs_t) 2354 if err := syscall.Statfs(loopFile, buf); err != nil { 2355 logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err) 2356 return 0, err 2357 } 2358 return buf.Bfree * uint64(buf.Bsize), nil 2359 } 2360 2361 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 2362 if loopFile != "" { 2363 fi, err := os.Stat(loopFile) 2364 if err != nil { 2365 logrus.Warnf("devmapper: Couldn't stat loopfile %v: %v", loopFile, err) 2366 return false, err 2367 } 2368 return fi.Mode().IsRegular(), nil 2369 } 2370 return false, nil 2371 } 2372 2373 // Status returns the current status of this deviceset 2374 func (devices *DeviceSet) Status() *Status { 2375 devices.Lock() 2376 defer devices.Unlock() 2377 2378 status := &Status{} 2379 2380 status.PoolName = devices.getPoolName() 2381 status.DataFile = devices.DataDevicePath() 2382 status.DataLoopback = devices.dataLoopFile 2383 status.MetadataFile = devices.MetadataDevicePath() 2384 status.MetadataLoopback = devices.metadataLoopFile 2385 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 2386 status.DeferredRemoveEnabled = devices.deferredRemove 2387 status.DeferredDeleteEnabled = devices.deferredDelete 2388 status.DeferredDeletedDeviceCount = devices.nrDeletedDevices 2389 status.BaseDeviceSize = devices.getBaseDeviceSize() 2390 status.BaseDeviceFS = devices.getBaseDeviceFS() 2391 2392 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 2393 if err == nil { 2394 // Convert from blocks to bytes 2395 blockSizeInSectors := totalSizeInSectors / dataTotal 2396 2397 status.Data.Used = dataUsed * blockSizeInSectors * 512 2398 status.Data.Total = dataTotal * blockSizeInSectors * 512 2399 status.Data.Available = status.Data.Total - status.Data.Used 2400 2401 // metadata blocks are always 4k 2402 status.Metadata.Used = metadataUsed * 4096 2403 status.Metadata.Total = metadataTotal * 4096 2404 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 2405 2406 status.SectorSize = blockSizeInSectors * 512 2407 2408 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 2409 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 2410 if err == nil && actualSpace < status.Data.Available { 2411 status.Data.Available = actualSpace 2412 } 2413 } 2414 2415 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 2416 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 2417 if err == nil && actualSpace < status.Metadata.Available { 2418 status.Metadata.Available = actualSpace 2419 } 2420 } 2421 } 2422 2423 return status 2424 } 2425 2426 // Status returns the current status of this deviceset 2427 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 2428 info, err := devices.lookupDeviceWithLock(hash) 2429 if err != nil { 2430 return nil, err 2431 } 2432 2433 info.lock.Lock() 2434 defer info.lock.Unlock() 2435 2436 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 2437 return metadata, nil 2438 } 2439 2440 // NewDeviceSet creates the device set based on the options provided. 2441 func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) { 2442 devicemapper.SetDevDir("/dev") 2443 2444 devices := &DeviceSet{ 2445 root: root, 2446 metaData: metaData{Devices: make(map[string]*devInfo)}, 2447 dataLoopbackSize: defaultDataLoopbackSize, 2448 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 2449 baseFsSize: defaultBaseFsSize, 2450 overrideUdevSyncCheck: defaultUdevSyncOverride, 2451 doBlkDiscard: true, 2452 thinpBlockSize: defaultThinpBlockSize, 2453 deviceIDMap: make([]byte, deviceIDMapSz), 2454 deletionWorkerTicker: time.NewTicker(time.Second * 30), 2455 uidMaps: uidMaps, 2456 gidMaps: gidMaps, 2457 minFreeSpacePercent: defaultMinFreeSpacePercent, 2458 } 2459 2460 foundBlkDiscard := false 2461 for _, option := range options { 2462 key, val, err := parsers.ParseKeyValueOpt(option) 2463 if err != nil { 2464 return nil, err 2465 } 2466 key = strings.ToLower(key) 2467 switch key { 2468 case "dm.basesize": 2469 size, err := units.RAMInBytes(val) 2470 if err != nil { 2471 return nil, err 2472 } 2473 userBaseSize = true 2474 devices.baseFsSize = uint64(size) 2475 case "dm.loopdatasize": 2476 size, err := units.RAMInBytes(val) 2477 if err != nil { 2478 return nil, err 2479 } 2480 devices.dataLoopbackSize = size 2481 case "dm.loopmetadatasize": 2482 size, err := units.RAMInBytes(val) 2483 if err != nil { 2484 return nil, err 2485 } 2486 devices.metaDataLoopbackSize = size 2487 case "dm.fs": 2488 if val != "ext4" && val != "xfs" { 2489 return nil, fmt.Errorf("devmapper: Unsupported filesystem %s\n", val) 2490 } 2491 devices.filesystem = val 2492 case "dm.mkfsarg": 2493 devices.mkfsArgs = append(devices.mkfsArgs, val) 2494 case "dm.mountopt": 2495 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2496 case "dm.metadatadev": 2497 devices.metadataDevice = val 2498 case "dm.datadev": 2499 devices.dataDevice = val 2500 case "dm.thinpooldev": 2501 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2502 case "dm.blkdiscard": 2503 foundBlkDiscard = true 2504 devices.doBlkDiscard, err = strconv.ParseBool(val) 2505 if err != nil { 2506 return nil, err 2507 } 2508 case "dm.blocksize": 2509 size, err := units.RAMInBytes(val) 2510 if err != nil { 2511 return nil, err 2512 } 2513 // convert to 512b sectors 2514 devices.thinpBlockSize = uint32(size) >> 9 2515 case "dm.override_udev_sync_check": 2516 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2517 if err != nil { 2518 return nil, err 2519 } 2520 2521 case "dm.use_deferred_removal": 2522 enableDeferredRemoval, err = strconv.ParseBool(val) 2523 if err != nil { 2524 return nil, err 2525 } 2526 2527 case "dm.use_deferred_deletion": 2528 enableDeferredDeletion, err = strconv.ParseBool(val) 2529 if err != nil { 2530 return nil, err 2531 } 2532 2533 case "dm.min_free_space": 2534 if !strings.HasSuffix(val, "%") { 2535 return nil, fmt.Errorf("devmapper: Option dm.min_free_space requires %% suffix") 2536 } 2537 2538 valstring := strings.TrimSuffix(val, "%") 2539 minFreeSpacePercent, err := strconv.ParseUint(valstring, 10, 32) 2540 if err != nil { 2541 return nil, err 2542 } 2543 2544 if minFreeSpacePercent >= 100 { 2545 return nil, fmt.Errorf("devmapper: Invalid value %v for option dm.min_free_space", val) 2546 } 2547 2548 devices.minFreeSpacePercent = uint32(minFreeSpacePercent) 2549 default: 2550 return nil, fmt.Errorf("devmapper: Unknown option %s\n", key) 2551 } 2552 } 2553 2554 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2555 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2556 devices.doBlkDiscard = false 2557 } 2558 2559 if err := devices.initDevmapper(doInit); err != nil { 2560 return nil, err 2561 } 2562 2563 return devices, nil 2564 }