github.com/dpiddy/docker@v1.12.2-rc1/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "bufio" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 "path" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "syscall" 20 "time" 21 22 "github.com/Sirupsen/logrus" 23 24 "github.com/docker/docker/daemon/graphdriver" 25 "github.com/docker/docker/dockerversion" 26 "github.com/docker/docker/pkg/devicemapper" 27 "github.com/docker/docker/pkg/idtools" 28 "github.com/docker/docker/pkg/loopback" 29 "github.com/docker/docker/pkg/mount" 30 "github.com/docker/docker/pkg/parsers" 31 "github.com/docker/go-units" 32 33 "github.com/opencontainers/runc/libcontainer/label" 34 ) 35 36 var ( 37 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 38 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 39 defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024 40 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 41 defaultUdevSyncOverride = false 42 maxDeviceID = 0xffffff // 24 bit, pool limit 43 deviceIDMapSz = (maxDeviceID + 1) / 8 44 // We retry device removal so many a times that even error messages 45 // will fill up console during normal operation. So only log Fatal 46 // messages by default. 47 logLevel = devicemapper.LogLevelFatal 48 driverDeferredRemovalSupport = false 49 enableDeferredRemoval = false 50 enableDeferredDeletion = false 51 userBaseSize = false 52 defaultMinFreeSpacePercent uint32 = 10 53 ) 54 55 const deviceSetMetaFile string = "deviceset-metadata" 56 const transactionMetaFile string = "transaction-metadata" 57 58 type transaction struct { 59 OpenTransactionID uint64 `json:"open_transaction_id"` 60 DeviceIDHash string `json:"device_hash"` 61 DeviceID int `json:"device_id"` 62 } 63 64 type devInfo struct { 65 Hash string `json:"-"` 66 DeviceID int `json:"device_id"` 67 Size uint64 `json:"size"` 68 TransactionID uint64 `json:"transaction_id"` 69 Initialized bool `json:"initialized"` 70 Deleted bool `json:"deleted"` 71 devices *DeviceSet 72 73 // The global DeviceSet lock guarantees that we serialize all 74 // the calls to libdevmapper (which is not threadsafe), but we 75 // sometimes release that lock while sleeping. In that case 76 // this per-device lock is still held, protecting against 77 // other accesses to the device that we're doing the wait on. 78 // 79 // WARNING: In order to avoid AB-BA deadlocks when releasing 80 // the global lock while holding the per-device locks all 81 // device locks must be acquired *before* the device lock, and 82 // multiple device locks should be acquired parent before child. 83 lock sync.Mutex 84 } 85 86 type metaData struct { 87 Devices map[string]*devInfo `json:"Devices"` 88 } 89 90 // DeviceSet holds information about list of devices 91 type DeviceSet struct { 92 metaData `json:"-"` 93 sync.Mutex `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper 94 root string 95 devicePrefix string 96 TransactionID uint64 `json:"-"` 97 NextDeviceID int `json:"next_device_id"` 98 deviceIDMap []byte 99 100 // Options 101 dataLoopbackSize int64 102 metaDataLoopbackSize int64 103 baseFsSize uint64 104 filesystem string 105 mountOptions string 106 mkfsArgs []string 107 dataDevice string // block or loop dev 108 dataLoopFile string // loopback file, if used 109 metadataDevice string // block or loop dev 110 metadataLoopFile string // loopback file, if used 111 doBlkDiscard bool 112 thinpBlockSize uint32 113 thinPoolDevice string 114 transaction `json:"-"` 115 overrideUdevSyncCheck bool 116 deferredRemove bool // use deferred removal 117 deferredDelete bool // use deferred deletion 118 BaseDeviceUUID string // save UUID of base device 119 BaseDeviceFilesystem string // save filesystem of base device 120 nrDeletedDevices uint // number of deleted devices 121 deletionWorkerTicker *time.Ticker 122 uidMaps []idtools.IDMap 123 gidMaps []idtools.IDMap 124 minFreeSpacePercent uint32 //min free space percentage in thinpool 125 } 126 127 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 128 type DiskUsage struct { 129 // Used bytes on the disk. 130 Used uint64 131 // Total bytes on the disk. 132 Total uint64 133 // Available bytes on the disk. 134 Available uint64 135 } 136 137 // Status returns the information about the device. 138 type Status struct { 139 // PoolName is the name of the data pool. 140 PoolName string 141 // DataFile is the actual block device for data. 142 DataFile string 143 // DataLoopback loopback file, if used. 144 DataLoopback string 145 // MetadataFile is the actual block device for metadata. 146 MetadataFile string 147 // MetadataLoopback is the loopback file, if used. 148 MetadataLoopback string 149 // Data is the disk used for data. 150 Data DiskUsage 151 // Metadata is the disk used for meta data. 152 Metadata DiskUsage 153 // BaseDeviceSize is base size of container and image 154 BaseDeviceSize uint64 155 // BaseDeviceFS is backing filesystem. 156 BaseDeviceFS string 157 // SectorSize size of the vector. 158 SectorSize uint64 159 // UdevSyncSupported is true if sync is supported. 160 UdevSyncSupported bool 161 // DeferredRemoveEnabled is true then the device is not unmounted. 162 DeferredRemoveEnabled bool 163 // True if deferred deletion is enabled. This is different from 164 // deferred removal. "removal" means that device mapper device is 165 // deactivated. Thin device is still in thin pool and can be activated 166 // again. But "deletion" means that thin device will be deleted from 167 // thin pool and it can't be activated again. 168 DeferredDeleteEnabled bool 169 DeferredDeletedDeviceCount uint 170 MinFreeSpace uint64 171 } 172 173 // Structure used to export image/container metadata in docker inspect. 174 type deviceMetadata struct { 175 deviceID int 176 deviceSize uint64 // size in bytes 177 deviceName string // Device name as used during activation 178 } 179 180 // DevStatus returns information about device mounted containing its id, size and sector information. 181 type DevStatus struct { 182 // DeviceID is the id of the device. 183 DeviceID int 184 // Size is the size of the filesystem. 185 Size uint64 186 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 187 TransactionID uint64 188 // SizeInSectors indicates the size of the sectors allocated. 189 SizeInSectors uint64 190 // MappedSectors indicates number of mapped sectors. 191 MappedSectors uint64 192 // HighestMappedSector is the pointer to the highest mapped sector. 193 HighestMappedSector uint64 194 } 195 196 func getDevName(name string) string { 197 return "/dev/mapper/" + name 198 } 199 200 func (info *devInfo) Name() string { 201 hash := info.Hash 202 if hash == "" { 203 hash = "base" 204 } 205 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 206 } 207 208 func (info *devInfo) DevName() string { 209 return getDevName(info.Name()) 210 } 211 212 func (devices *DeviceSet) loopbackDir() string { 213 return path.Join(devices.root, "devicemapper") 214 } 215 216 func (devices *DeviceSet) metadataDir() string { 217 return path.Join(devices.root, "metadata") 218 } 219 220 func (devices *DeviceSet) metadataFile(info *devInfo) string { 221 file := info.Hash 222 if file == "" { 223 file = "base" 224 } 225 return path.Join(devices.metadataDir(), file) 226 } 227 228 func (devices *DeviceSet) transactionMetaFile() string { 229 return path.Join(devices.metadataDir(), transactionMetaFile) 230 } 231 232 func (devices *DeviceSet) deviceSetMetaFile() string { 233 return path.Join(devices.metadataDir(), deviceSetMetaFile) 234 } 235 236 func (devices *DeviceSet) oldMetadataFile() string { 237 return path.Join(devices.loopbackDir(), "json") 238 } 239 240 func (devices *DeviceSet) getPoolName() string { 241 if devices.thinPoolDevice == "" { 242 return devices.devicePrefix + "-pool" 243 } 244 return devices.thinPoolDevice 245 } 246 247 func (devices *DeviceSet) getPoolDevName() string { 248 return getDevName(devices.getPoolName()) 249 } 250 251 func (devices *DeviceSet) hasImage(name string) bool { 252 dirname := devices.loopbackDir() 253 filename := path.Join(dirname, name) 254 255 _, err := os.Stat(filename) 256 return err == nil 257 } 258 259 // ensureImage creates a sparse file of <size> bytes at the path 260 // <root>/devicemapper/<name>. 261 // If the file already exists and new size is larger than its current size, it grows to the new size. 262 // Either way it returns the full path. 263 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 264 dirname := devices.loopbackDir() 265 filename := path.Join(dirname, name) 266 267 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 268 if err != nil { 269 return "", err 270 } 271 if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) { 272 return "", err 273 } 274 275 if fi, err := os.Stat(filename); err != nil { 276 if !os.IsNotExist(err) { 277 return "", err 278 } 279 logrus.Debugf("devmapper: Creating loopback file %s for device-manage use", filename) 280 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 281 if err != nil { 282 return "", err 283 } 284 defer file.Close() 285 286 if err := file.Truncate(size); err != nil { 287 return "", err 288 } 289 } else { 290 if fi.Size() < size { 291 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 292 if err != nil { 293 return "", err 294 } 295 defer file.Close() 296 if err := file.Truncate(size); err != nil { 297 return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err) 298 } 299 } else if fi.Size() > size { 300 logrus.Warnf("devmapper: Can't shrink loopback file %s", filename) 301 } 302 } 303 return filename, nil 304 } 305 306 func (devices *DeviceSet) allocateTransactionID() uint64 { 307 devices.OpenTransactionID = devices.TransactionID + 1 308 return devices.OpenTransactionID 309 } 310 311 func (devices *DeviceSet) updatePoolTransactionID() error { 312 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 313 return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err) 314 } 315 devices.TransactionID = devices.OpenTransactionID 316 return nil 317 } 318 319 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 320 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 321 return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err) 322 } 323 return nil 324 } 325 326 // Given json data and file path, write it to disk 327 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 328 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 329 if err != nil { 330 return fmt.Errorf("devmapper: Error creating metadata file: %s", err) 331 } 332 333 n, err := tmpFile.Write(jsonData) 334 if err != nil { 335 return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err) 336 } 337 if n < len(jsonData) { 338 return io.ErrShortWrite 339 } 340 if err := tmpFile.Sync(); err != nil { 341 return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err) 342 } 343 if err := tmpFile.Close(); err != nil { 344 return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err) 345 } 346 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 347 return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err) 348 } 349 350 return nil 351 } 352 353 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 354 jsonData, err := json.Marshal(info) 355 if err != nil { 356 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 357 } 358 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 359 return err 360 } 361 return nil 362 } 363 364 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 365 var mask byte 366 i := deviceID % 8 367 mask = 1 << uint(i) 368 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 369 } 370 371 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 372 var mask byte 373 i := deviceID % 8 374 mask = ^(1 << uint(i)) 375 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 376 } 377 378 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 379 var mask byte 380 i := deviceID % 8 381 mask = (1 << uint(i)) 382 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 383 return false 384 } 385 return true 386 } 387 388 // Should be called with devices.Lock() held. 389 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 390 info := devices.Devices[hash] 391 if info == nil { 392 info = devices.loadMetadata(hash) 393 if info == nil { 394 return nil, fmt.Errorf("devmapper: Unknown device %s", hash) 395 } 396 397 devices.Devices[hash] = info 398 } 399 return info, nil 400 } 401 402 func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) { 403 devices.Lock() 404 defer devices.Unlock() 405 info, err := devices.lookupDevice(hash) 406 return info, err 407 } 408 409 // This function relies on that device hash map has been loaded in advance. 410 // Should be called with devices.Lock() held. 411 func (devices *DeviceSet) constructDeviceIDMap() { 412 logrus.Debugf("devmapper: constructDeviceIDMap()") 413 defer logrus.Debugf("devmapper: constructDeviceIDMap() END") 414 415 for _, info := range devices.Devices { 416 devices.markDeviceIDUsed(info.DeviceID) 417 logrus.Debugf("devmapper: Added deviceId=%d to DeviceIdMap", info.DeviceID) 418 } 419 } 420 421 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 422 423 // Skip some of the meta files which are not device files. 424 if strings.HasSuffix(finfo.Name(), ".migrated") { 425 logrus.Debugf("devmapper: Skipping file %s", path) 426 return nil 427 } 428 429 if strings.HasPrefix(finfo.Name(), ".") { 430 logrus.Debugf("devmapper: Skipping file %s", path) 431 return nil 432 } 433 434 if finfo.Name() == deviceSetMetaFile { 435 logrus.Debugf("devmapper: Skipping file %s", path) 436 return nil 437 } 438 439 if finfo.Name() == transactionMetaFile { 440 logrus.Debugf("devmapper: Skipping file %s", path) 441 return nil 442 } 443 444 logrus.Debugf("devmapper: Loading data for file %s", path) 445 446 hash := finfo.Name() 447 if hash == "base" { 448 hash = "" 449 } 450 451 // Include deleted devices also as cleanup delete device logic 452 // will go through it and see if there are any deleted devices. 453 if _, err := devices.lookupDevice(hash); err != nil { 454 return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err) 455 } 456 457 return nil 458 } 459 460 func (devices *DeviceSet) loadDeviceFilesOnStart() error { 461 logrus.Debugf("devmapper: loadDeviceFilesOnStart()") 462 defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END") 463 464 var scan = func(path string, info os.FileInfo, err error) error { 465 if err != nil { 466 logrus.Debugf("devmapper: Can't walk the file %s", path) 467 return nil 468 } 469 470 // Skip any directories 471 if info.IsDir() { 472 return nil 473 } 474 475 return devices.deviceFileWalkFunction(path, info) 476 } 477 478 return filepath.Walk(devices.metadataDir(), scan) 479 } 480 481 // Should be called with devices.Lock() held. 482 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 483 logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash) 484 info := &devInfo{ 485 Hash: hash, 486 DeviceID: id, 487 } 488 489 delete(devices.Devices, hash) 490 491 if err := devices.removeMetadata(info); err != nil { 492 logrus.Debugf("devmapper: Error removing metadata: %s", err) 493 return err 494 } 495 496 return nil 497 } 498 499 // Should be called with devices.Lock() held. 500 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 501 logrus.Debugf("devmapper: registerDevice(%v, %v)", id, hash) 502 info := &devInfo{ 503 Hash: hash, 504 DeviceID: id, 505 Size: size, 506 TransactionID: transactionID, 507 Initialized: false, 508 devices: devices, 509 } 510 511 devices.Devices[hash] = info 512 513 if err := devices.saveMetadata(info); err != nil { 514 // Try to remove unused device 515 delete(devices.Devices, hash) 516 return nil, err 517 } 518 519 return info, nil 520 } 521 522 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error { 523 logrus.Debugf("devmapper: activateDeviceIfNeeded(%v)", info.Hash) 524 525 if info.Deleted && !ignoreDeleted { 526 return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash) 527 } 528 529 // Make sure deferred removal on device is canceled, if one was 530 // scheduled. 531 if err := devices.cancelDeferredRemoval(info); err != nil { 532 return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err) 533 } 534 535 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 536 return nil 537 } 538 539 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 540 } 541 542 // Return true only if kernel supports xfs and mkfs.xfs is available 543 func xfsSupported() bool { 544 // Make sure mkfs.xfs is available 545 if _, err := exec.LookPath("mkfs.xfs"); err != nil { 546 return false 547 } 548 549 // Check if kernel supports xfs filesystem or not. 550 exec.Command("modprobe", "xfs").Run() 551 552 f, err := os.Open("/proc/filesystems") 553 if err != nil { 554 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 555 return false 556 } 557 defer f.Close() 558 559 s := bufio.NewScanner(f) 560 for s.Scan() { 561 if strings.HasSuffix(s.Text(), "\txfs") { 562 return true 563 } 564 } 565 566 if err := s.Err(); err != nil { 567 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 568 } 569 return false 570 } 571 572 func determineDefaultFS() string { 573 if xfsSupported() { 574 return "xfs" 575 } 576 577 logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem") 578 return "ext4" 579 } 580 581 func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) { 582 devname := info.DevName() 583 584 args := []string{} 585 for _, arg := range devices.mkfsArgs { 586 args = append(args, arg) 587 } 588 589 args = append(args, devname) 590 591 if devices.filesystem == "" { 592 devices.filesystem = determineDefaultFS() 593 } 594 if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil { 595 return err 596 } 597 598 logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name()) 599 defer func() { 600 if err != nil { 601 logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err) 602 } else { 603 logrus.Infof("devmapper: Successfully created filesystem %s on device %s", devices.filesystem, info.Name()) 604 } 605 }() 606 607 switch devices.filesystem { 608 case "xfs": 609 err = exec.Command("mkfs.xfs", args...).Run() 610 case "ext4": 611 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 612 if err != nil { 613 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 614 } 615 if err != nil { 616 return err 617 } 618 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 619 default: 620 err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem) 621 } 622 return 623 } 624 625 func (devices *DeviceSet) migrateOldMetaData() error { 626 // Migrate old metadata file 627 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 628 if err != nil && !os.IsNotExist(err) { 629 return err 630 } 631 632 if jsonData != nil { 633 m := metaData{Devices: make(map[string]*devInfo)} 634 635 if err := json.Unmarshal(jsonData, &m); err != nil { 636 return err 637 } 638 639 for hash, info := range m.Devices { 640 info.Hash = hash 641 devices.saveMetadata(info) 642 } 643 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 644 return err 645 } 646 647 } 648 649 return nil 650 } 651 652 // Cleanup deleted devices. It assumes that all the devices have been 653 // loaded in the hash table. 654 func (devices *DeviceSet) cleanupDeletedDevices() error { 655 devices.Lock() 656 657 // If there are no deleted devices, there is nothing to do. 658 if devices.nrDeletedDevices == 0 { 659 devices.Unlock() 660 return nil 661 } 662 663 var deletedDevices []*devInfo 664 665 for _, info := range devices.Devices { 666 if !info.Deleted { 667 continue 668 } 669 logrus.Debugf("devmapper: Found deleted device %s.", info.Hash) 670 deletedDevices = append(deletedDevices, info) 671 } 672 673 // Delete the deleted devices. DeleteDevice() first takes the info lock 674 // and then devices.Lock(). So drop it to avoid deadlock. 675 devices.Unlock() 676 677 for _, info := range deletedDevices { 678 // This will again try deferred deletion. 679 if err := devices.DeleteDevice(info.Hash, false); err != nil { 680 logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err) 681 } 682 } 683 684 return nil 685 } 686 687 func (devices *DeviceSet) countDeletedDevices() { 688 for _, info := range devices.Devices { 689 if !info.Deleted { 690 continue 691 } 692 devices.nrDeletedDevices++ 693 } 694 } 695 696 func (devices *DeviceSet) startDeviceDeletionWorker() { 697 // Deferred deletion is not enabled. Don't do anything. 698 if !devices.deferredDelete { 699 return 700 } 701 702 logrus.Debug("devmapper: Worker to cleanup deleted devices started") 703 for range devices.deletionWorkerTicker.C { 704 devices.cleanupDeletedDevices() 705 } 706 } 707 708 func (devices *DeviceSet) initMetaData() error { 709 devices.Lock() 710 defer devices.Unlock() 711 712 if err := devices.migrateOldMetaData(); err != nil { 713 return err 714 } 715 716 _, transactionID, _, _, _, _, err := devices.poolStatus() 717 if err != nil { 718 return err 719 } 720 721 devices.TransactionID = transactionID 722 723 if err := devices.loadDeviceFilesOnStart(); err != nil { 724 return fmt.Errorf("devmapper: Failed to load device files:%v", err) 725 } 726 727 devices.constructDeviceIDMap() 728 devices.countDeletedDevices() 729 730 if err := devices.processPendingTransaction(); err != nil { 731 return err 732 } 733 734 // Start a goroutine to cleanup Deleted Devices 735 go devices.startDeviceDeletionWorker() 736 return nil 737 } 738 739 func (devices *DeviceSet) incNextDeviceID() { 740 // IDs are 24bit, so wrap around 741 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 742 } 743 744 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 745 devices.incNextDeviceID() 746 for i := 0; i <= maxDeviceID; i++ { 747 if devices.isDeviceIDFree(devices.NextDeviceID) { 748 devices.markDeviceIDUsed(devices.NextDeviceID) 749 return devices.NextDeviceID, nil 750 } 751 devices.incNextDeviceID() 752 } 753 754 return 0, fmt.Errorf("devmapper: Unable to find a free device ID") 755 } 756 757 func (devices *DeviceSet) poolHasFreeSpace() error { 758 if devices.minFreeSpacePercent == 0 { 759 return nil 760 } 761 762 _, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 763 if err != nil { 764 return err 765 } 766 767 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 768 if minFreeData < 1 { 769 minFreeData = 1 770 } 771 dataFree := dataTotal - dataUsed 772 if dataFree < minFreeData { 773 return fmt.Errorf("devmapper: Thin Pool has %v free data blocks which is less than minimum required %v free data blocks. Create more free space in thin pool or use dm.min_free_space option to change behavior", (dataTotal - dataUsed), minFreeData) 774 } 775 776 minFreeMetadata := (metadataTotal * uint64(devices.minFreeSpacePercent)) / 100 777 if minFreeMetadata < 1 { 778 minFreeMetadata = 1 779 } 780 781 metadataFree := metadataTotal - metadataUsed 782 if metadataFree < minFreeMetadata { 783 return fmt.Errorf("devmapper: Thin Pool has %v free metadata blocks which is less than minimum required %v free metadata blocks. Create more free metadata space in thin pool or use dm.min_free_space option to change behavior", (metadataTotal - metadataUsed), minFreeMetadata) 784 } 785 786 return nil 787 } 788 789 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 790 devices.Lock() 791 defer devices.Unlock() 792 793 deviceID, err := devices.getNextFreeDeviceID() 794 if err != nil { 795 return nil, err 796 } 797 798 if err := devices.openTransaction(hash, deviceID); err != nil { 799 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 800 devices.markDeviceIDFree(deviceID) 801 return nil, err 802 } 803 804 for { 805 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 806 if devicemapper.DeviceIDExists(err) { 807 // Device ID already exists. This should not 808 // happen. Now we have a mechanism to find 809 // a free device ID. So something is not right. 810 // Give a warning and continue. 811 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 812 deviceID, err = devices.getNextFreeDeviceID() 813 if err != nil { 814 return nil, err 815 } 816 // Save new device id into transaction 817 devices.refreshTransaction(deviceID) 818 continue 819 } 820 logrus.Debugf("devmapper: Error creating device: %s", err) 821 devices.markDeviceIDFree(deviceID) 822 return nil, err 823 } 824 break 825 } 826 827 logrus.Debugf("devmapper: Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 828 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 829 if err != nil { 830 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 831 devices.markDeviceIDFree(deviceID) 832 return nil, err 833 } 834 835 if err := devices.closeTransaction(); err != nil { 836 devices.unregisterDevice(deviceID, hash) 837 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 838 devices.markDeviceIDFree(deviceID) 839 return nil, err 840 } 841 return info, nil 842 } 843 844 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error { 845 if err := devices.poolHasFreeSpace(); err != nil { 846 return err 847 } 848 849 deviceID, err := devices.getNextFreeDeviceID() 850 if err != nil { 851 return err 852 } 853 854 if err := devices.openTransaction(hash, deviceID); err != nil { 855 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 856 devices.markDeviceIDFree(deviceID) 857 return err 858 } 859 860 for { 861 if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil { 862 if devicemapper.DeviceIDExists(err) { 863 // Device ID already exists. This should not 864 // happen. Now we have a mechanism to find 865 // a free device ID. So something is not right. 866 // Give a warning and continue. 867 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 868 deviceID, err = devices.getNextFreeDeviceID() 869 if err != nil { 870 return err 871 } 872 // Save new device id into transaction 873 devices.refreshTransaction(deviceID) 874 continue 875 } 876 logrus.Debugf("devmapper: Error creating snap device: %s", err) 877 devices.markDeviceIDFree(deviceID) 878 return err 879 } 880 break 881 } 882 883 if _, err := devices.registerDevice(deviceID, hash, size, devices.OpenTransactionID); err != nil { 884 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 885 devices.markDeviceIDFree(deviceID) 886 logrus.Debugf("devmapper: Error registering device: %s", err) 887 return err 888 } 889 890 if err := devices.closeTransaction(); err != nil { 891 devices.unregisterDevice(deviceID, hash) 892 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 893 devices.markDeviceIDFree(deviceID) 894 return err 895 } 896 return nil 897 } 898 899 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 900 info := &devInfo{Hash: hash, devices: devices} 901 902 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 903 if err != nil { 904 return nil 905 } 906 907 if err := json.Unmarshal(jsonData, &info); err != nil { 908 return nil 909 } 910 911 if info.DeviceID > maxDeviceID { 912 logrus.Errorf("devmapper: Ignoring Invalid DeviceId=%d", info.DeviceID) 913 return nil 914 } 915 916 return info 917 } 918 919 func getDeviceUUID(device string) (string, error) { 920 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 921 if err != nil { 922 return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err) 923 } 924 925 uuid := strings.TrimSuffix(string(out), "\n") 926 uuid = strings.TrimSpace(uuid) 927 logrus.Debugf("devmapper: UUID for device: %s is:%s", device, uuid) 928 return uuid, nil 929 } 930 931 func (devices *DeviceSet) getBaseDeviceSize() uint64 { 932 info, _ := devices.lookupDevice("") 933 if info == nil { 934 return 0 935 } 936 return info.Size 937 } 938 939 func (devices *DeviceSet) getBaseDeviceFS() string { 940 return devices.BaseDeviceFilesystem 941 } 942 943 func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error { 944 devices.Lock() 945 defer devices.Unlock() 946 947 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 948 return err 949 } 950 defer devices.deactivateDevice(baseInfo) 951 952 uuid, err := getDeviceUUID(baseInfo.DevName()) 953 if err != nil { 954 return err 955 } 956 957 if devices.BaseDeviceUUID != uuid { 958 return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID) 959 } 960 961 if devices.BaseDeviceFilesystem == "" { 962 fsType, err := ProbeFsType(baseInfo.DevName()) 963 if err != nil { 964 return err 965 } 966 if err := devices.saveBaseDeviceFilesystem(fsType); err != nil { 967 return err 968 } 969 } 970 971 // If user specified a filesystem using dm.fs option and current 972 // file system of base image is not same, warn user that dm.fs 973 // will be ignored. 974 if devices.BaseDeviceFilesystem != devices.filesystem { 975 logrus.Warnf("devmapper: Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem) 976 devices.filesystem = devices.BaseDeviceFilesystem 977 } 978 return nil 979 } 980 981 func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error { 982 devices.BaseDeviceFilesystem = fs 983 return devices.saveDeviceSetMetaData() 984 } 985 986 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 987 devices.Lock() 988 defer devices.Unlock() 989 990 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 991 return err 992 } 993 defer devices.deactivateDevice(baseInfo) 994 995 uuid, err := getDeviceUUID(baseInfo.DevName()) 996 if err != nil { 997 return err 998 } 999 1000 devices.BaseDeviceUUID = uuid 1001 return devices.saveDeviceSetMetaData() 1002 } 1003 1004 func (devices *DeviceSet) createBaseImage() error { 1005 logrus.Debug("devmapper: Initializing base device-mapper thin volume") 1006 1007 // Create initial device 1008 info, err := devices.createRegisterDevice("") 1009 if err != nil { 1010 return err 1011 } 1012 1013 logrus.Debug("devmapper: Creating filesystem on base device-mapper thin volume") 1014 1015 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1016 return err 1017 } 1018 1019 if err := devices.createFilesystem(info); err != nil { 1020 return err 1021 } 1022 1023 info.Initialized = true 1024 if err := devices.saveMetadata(info); err != nil { 1025 info.Initialized = false 1026 return err 1027 } 1028 1029 if err := devices.saveBaseDeviceUUID(info); err != nil { 1030 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1031 } 1032 1033 return nil 1034 } 1035 1036 // Returns if thin pool device exists or not. If device exists, also makes 1037 // sure it is a thin pool device and not some other type of device. 1038 func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) { 1039 logrus.Debugf("devmapper: Checking for existence of the pool %s", thinPoolDevice) 1040 1041 info, err := devicemapper.GetInfo(thinPoolDevice) 1042 if err != nil { 1043 return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err) 1044 } 1045 1046 // Device does not exist. 1047 if info.Exists == 0 { 1048 return false, nil 1049 } 1050 1051 _, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice) 1052 if err != nil { 1053 return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err) 1054 } 1055 1056 if deviceType != "thin-pool" { 1057 return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice) 1058 } 1059 1060 return true, nil 1061 } 1062 1063 func (devices *DeviceSet) checkThinPool() error { 1064 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 1065 if err != nil { 1066 return err 1067 } 1068 if dataUsed != 0 { 1069 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks", 1070 devices.thinPoolDevice) 1071 } 1072 if transactionID != 0 { 1073 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 1074 devices.thinPoolDevice) 1075 } 1076 return nil 1077 } 1078 1079 // Base image is initialized properly. Either save UUID for first time (for 1080 // upgrade case or verify UUID. 1081 func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error { 1082 // If BaseDeviceUUID is nil (upgrade case), save it and return success. 1083 if devices.BaseDeviceUUID == "" { 1084 if err := devices.saveBaseDeviceUUID(baseInfo); err != nil { 1085 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1086 } 1087 return nil 1088 } 1089 1090 if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil { 1091 return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed: %v", err) 1092 } 1093 1094 return nil 1095 } 1096 1097 func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error { 1098 1099 if !userBaseSize { 1100 return nil 1101 } 1102 1103 if devices.baseFsSize < devices.getBaseDeviceSize() { 1104 return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize()))) 1105 } 1106 1107 if devices.baseFsSize == devices.getBaseDeviceSize() { 1108 return nil 1109 } 1110 1111 info.lock.Lock() 1112 defer info.lock.Unlock() 1113 1114 devices.Lock() 1115 defer devices.Unlock() 1116 1117 info.Size = devices.baseFsSize 1118 1119 if err := devices.saveMetadata(info); err != nil { 1120 // Try to remove unused device 1121 delete(devices.Devices, info.Hash) 1122 return err 1123 } 1124 1125 return devices.growFS(info) 1126 } 1127 1128 func (devices *DeviceSet) growFS(info *devInfo) error { 1129 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1130 return fmt.Errorf("Error activating devmapper device: %s", err) 1131 } 1132 1133 defer devices.deactivateDevice(info) 1134 1135 fsMountPoint := "/run/docker/mnt" 1136 if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) { 1137 if err := os.MkdirAll(fsMountPoint, 0700); err != nil { 1138 return err 1139 } 1140 defer os.RemoveAll(fsMountPoint) 1141 } 1142 1143 options := "" 1144 if devices.BaseDeviceFilesystem == "xfs" { 1145 // XFS needs nouuid or it can't mount filesystems with the same fs 1146 options = joinMountOptions(options, "nouuid") 1147 } 1148 options = joinMountOptions(options, devices.mountOptions) 1149 1150 if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil { 1151 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err) 1152 } 1153 1154 defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH) 1155 1156 switch devices.BaseDeviceFilesystem { 1157 case "ext4": 1158 if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil { 1159 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1160 } 1161 case "xfs": 1162 if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil { 1163 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1164 } 1165 default: 1166 return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem) 1167 } 1168 return nil 1169 } 1170 1171 func (devices *DeviceSet) setupBaseImage() error { 1172 oldInfo, _ := devices.lookupDeviceWithLock("") 1173 1174 // base image already exists. If it is initialized properly, do UUID 1175 // verification and return. Otherwise remove image and set it up 1176 // fresh. 1177 1178 if oldInfo != nil { 1179 if oldInfo.Initialized && !oldInfo.Deleted { 1180 if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil { 1181 return err 1182 } 1183 1184 if err := devices.checkGrowBaseDeviceFS(oldInfo); err != nil { 1185 return err 1186 } 1187 1188 return nil 1189 } 1190 1191 logrus.Debug("devmapper: Removing uninitialized base image") 1192 // If previous base device is in deferred delete state, 1193 // that needs to be cleaned up first. So don't try 1194 // deferred deletion. 1195 if err := devices.DeleteDevice("", true); err != nil { 1196 return err 1197 } 1198 } 1199 1200 // If we are setting up base image for the first time, make sure 1201 // thin pool is empty. 1202 if devices.thinPoolDevice != "" && oldInfo == nil { 1203 if err := devices.checkThinPool(); err != nil { 1204 return err 1205 } 1206 } 1207 1208 // Create new base image device 1209 if err := devices.createBaseImage(); err != nil { 1210 return err 1211 } 1212 1213 return nil 1214 } 1215 1216 func setCloseOnExec(name string) { 1217 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 1218 for _, i := range fileInfos { 1219 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 1220 if link == name { 1221 fd, err := strconv.Atoi(i.Name()) 1222 if err == nil { 1223 syscall.CloseOnExec(fd) 1224 } 1225 } 1226 } 1227 } 1228 } 1229 1230 // DMLog implements logging using DevMapperLogger interface. 1231 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 1232 // By default libdm sends us all the messages including debug ones. 1233 // We need to filter out messages here and figure out which one 1234 // should be printed. 1235 if level > logLevel { 1236 return 1237 } 1238 1239 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1240 if level <= devicemapper.LogLevelErr { 1241 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1242 } else if level <= devicemapper.LogLevelInfo { 1243 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1244 } else { 1245 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1246 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1247 } 1248 } 1249 1250 func major(device uint64) uint64 { 1251 return (device >> 8) & 0xfff 1252 } 1253 1254 func minor(device uint64) uint64 { 1255 return (device & 0xff) | ((device >> 12) & 0xfff00) 1256 } 1257 1258 // ResizePool increases the size of the pool. 1259 func (devices *DeviceSet) ResizePool(size int64) error { 1260 dirname := devices.loopbackDir() 1261 datafilename := path.Join(dirname, "data") 1262 if len(devices.dataDevice) > 0 { 1263 datafilename = devices.dataDevice 1264 } 1265 metadatafilename := path.Join(dirname, "metadata") 1266 if len(devices.metadataDevice) > 0 { 1267 metadatafilename = devices.metadataDevice 1268 } 1269 1270 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 1271 if datafile == nil { 1272 return err 1273 } 1274 defer datafile.Close() 1275 1276 fi, err := datafile.Stat() 1277 if fi == nil { 1278 return err 1279 } 1280 1281 if fi.Size() > size { 1282 return fmt.Errorf("devmapper: Can't shrink file") 1283 } 1284 1285 dataloopback := loopback.FindLoopDeviceFor(datafile) 1286 if dataloopback == nil { 1287 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename) 1288 } 1289 defer dataloopback.Close() 1290 1291 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 1292 if metadatafile == nil { 1293 return err 1294 } 1295 defer metadatafile.Close() 1296 1297 metadataloopback := loopback.FindLoopDeviceFor(metadatafile) 1298 if metadataloopback == nil { 1299 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename) 1300 } 1301 defer metadataloopback.Close() 1302 1303 // Grow loopback file 1304 if err := datafile.Truncate(size); err != nil { 1305 return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err) 1306 } 1307 1308 // Reload size for loopback device 1309 if err := loopback.SetCapacity(dataloopback); err != nil { 1310 return fmt.Errorf("Unable to update loopback capacity: %s", err) 1311 } 1312 1313 // Suspend the pool 1314 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 1315 return fmt.Errorf("devmapper: Unable to suspend pool: %s", err) 1316 } 1317 1318 // Reload with the new block sizes 1319 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 1320 return fmt.Errorf("devmapper: Unable to reload pool: %s", err) 1321 } 1322 1323 // Resume the pool 1324 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 1325 return fmt.Errorf("devmapper: Unable to resume pool: %s", err) 1326 } 1327 1328 return nil 1329 } 1330 1331 func (devices *DeviceSet) loadTransactionMetaData() error { 1332 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 1333 if err != nil { 1334 // There is no active transaction. This will be the case 1335 // during upgrade. 1336 if os.IsNotExist(err) { 1337 devices.OpenTransactionID = devices.TransactionID 1338 return nil 1339 } 1340 return err 1341 } 1342 1343 json.Unmarshal(jsonData, &devices.transaction) 1344 return nil 1345 } 1346 1347 func (devices *DeviceSet) saveTransactionMetaData() error { 1348 jsonData, err := json.Marshal(&devices.transaction) 1349 if err != nil { 1350 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1351 } 1352 1353 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 1354 } 1355 1356 func (devices *DeviceSet) removeTransactionMetaData() error { 1357 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 1358 return err 1359 } 1360 return nil 1361 } 1362 1363 func (devices *DeviceSet) rollbackTransaction() error { 1364 logrus.Debugf("devmapper: Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 1365 1366 // A device id might have already been deleted before transaction 1367 // closed. In that case this call will fail. Just leave a message 1368 // in case of failure. 1369 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 1370 logrus.Errorf("devmapper: Unable to delete device: %s", err) 1371 } 1372 1373 dinfo := &devInfo{Hash: devices.DeviceIDHash} 1374 if err := devices.removeMetadata(dinfo); err != nil { 1375 logrus.Errorf("devmapper: Unable to remove metadata: %s", err) 1376 } else { 1377 devices.markDeviceIDFree(devices.DeviceID) 1378 } 1379 1380 if err := devices.removeTransactionMetaData(); err != nil { 1381 logrus.Errorf("devmapper: Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 1382 } 1383 1384 return nil 1385 } 1386 1387 func (devices *DeviceSet) processPendingTransaction() error { 1388 if err := devices.loadTransactionMetaData(); err != nil { 1389 return err 1390 } 1391 1392 // If there was open transaction but pool transaction ID is same 1393 // as open transaction ID, nothing to roll back. 1394 if devices.TransactionID == devices.OpenTransactionID { 1395 return nil 1396 } 1397 1398 // If open transaction ID is less than pool transaction ID, something 1399 // is wrong. Bail out. 1400 if devices.OpenTransactionID < devices.TransactionID { 1401 logrus.Errorf("devmapper: Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1402 return nil 1403 } 1404 1405 // Pool transaction ID is not same as open transaction. There is 1406 // a transaction which was not completed. 1407 if err := devices.rollbackTransaction(); err != nil { 1408 return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err) 1409 } 1410 1411 devices.OpenTransactionID = devices.TransactionID 1412 return nil 1413 } 1414 1415 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1416 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1417 if err != nil { 1418 // For backward compatibility return success if file does 1419 // not exist. 1420 if os.IsNotExist(err) { 1421 return nil 1422 } 1423 return err 1424 } 1425 1426 return json.Unmarshal(jsonData, devices) 1427 } 1428 1429 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1430 jsonData, err := json.Marshal(devices) 1431 if err != nil { 1432 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1433 } 1434 1435 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1436 } 1437 1438 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1439 devices.allocateTransactionID() 1440 devices.DeviceIDHash = hash 1441 devices.DeviceID = DeviceID 1442 if err := devices.saveTransactionMetaData(); err != nil { 1443 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1444 } 1445 return nil 1446 } 1447 1448 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1449 devices.DeviceID = DeviceID 1450 if err := devices.saveTransactionMetaData(); err != nil { 1451 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1452 } 1453 return nil 1454 } 1455 1456 func (devices *DeviceSet) closeTransaction() error { 1457 if err := devices.updatePoolTransactionID(); err != nil { 1458 logrus.Debug("devmapper: Failed to close Transaction") 1459 return err 1460 } 1461 return nil 1462 } 1463 1464 func determineDriverCapabilities(version string) error { 1465 /* 1466 * Driver version 4.27.0 and greater support deferred activation 1467 * feature. 1468 */ 1469 1470 logrus.Debugf("devicemapper: driver version is %s", version) 1471 1472 versionSplit := strings.Split(version, ".") 1473 major, err := strconv.Atoi(versionSplit[0]) 1474 if err != nil { 1475 return graphdriver.ErrNotSupported 1476 } 1477 1478 if major > 4 { 1479 driverDeferredRemovalSupport = true 1480 return nil 1481 } 1482 1483 if major < 4 { 1484 return nil 1485 } 1486 1487 minor, err := strconv.Atoi(versionSplit[1]) 1488 if err != nil { 1489 return graphdriver.ErrNotSupported 1490 } 1491 1492 /* 1493 * If major is 4 and minor is 27, then there is no need to 1494 * check for patch level as it can not be less than 0. 1495 */ 1496 if minor >= 27 { 1497 driverDeferredRemovalSupport = true 1498 return nil 1499 } 1500 1501 return nil 1502 } 1503 1504 // Determine the major and minor number of loopback device 1505 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1506 stat, err := file.Stat() 1507 if err != nil { 1508 return 0, 0, err 1509 } 1510 1511 dev := stat.Sys().(*syscall.Stat_t).Rdev 1512 majorNum := major(dev) 1513 minorNum := minor(dev) 1514 1515 logrus.Debugf("devmapper: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1516 return majorNum, minorNum, nil 1517 } 1518 1519 // Given a file which is backing file of a loop back device, find the 1520 // loopback device name and its major/minor number. 1521 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1522 file, err := os.Open(filename) 1523 if err != nil { 1524 logrus.Debugf("devmapper: Failed to open file %s", filename) 1525 return "", 0, 0, err 1526 } 1527 1528 defer file.Close() 1529 loopbackDevice := loopback.FindLoopDeviceFor(file) 1530 if loopbackDevice == nil { 1531 return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename) 1532 } 1533 defer loopbackDevice.Close() 1534 1535 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1536 if err != nil { 1537 return "", 0, 0, err 1538 } 1539 return loopbackDevice.Name(), Major, Minor, nil 1540 } 1541 1542 // Get the major/minor numbers of thin pool data and metadata devices 1543 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1544 var params, poolDataMajMin, poolMetadataMajMin string 1545 1546 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1547 if err != nil { 1548 return 0, 0, 0, 0, err 1549 } 1550 1551 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1552 return 0, 0, 0, 0, err 1553 } 1554 1555 logrus.Debugf("devmapper: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1556 1557 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1558 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1559 if err != nil { 1560 return 0, 0, 0, 0, err 1561 } 1562 1563 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1564 if err != nil { 1565 return 0, 0, 0, 0, err 1566 } 1567 1568 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1569 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1570 if err != nil { 1571 return 0, 0, 0, 0, err 1572 } 1573 1574 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1575 if err != nil { 1576 return 0, 0, 0, 0, err 1577 } 1578 1579 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1580 } 1581 1582 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1583 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1584 if err != nil { 1585 return err 1586 } 1587 1588 dirname := devices.loopbackDir() 1589 1590 // data device has not been passed in. So there should be a data file 1591 // which is being mounted as loop device. 1592 if devices.dataDevice == "" { 1593 datafilename := path.Join(dirname, "data") 1594 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1595 if err != nil { 1596 return err 1597 } 1598 1599 // Compare the two 1600 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1601 devices.dataDevice = dataLoopDevice 1602 devices.dataLoopFile = datafilename 1603 } 1604 1605 } 1606 1607 // metadata device has not been passed in. So there should be a 1608 // metadata file which is being mounted as loop device. 1609 if devices.metadataDevice == "" { 1610 metadatafilename := path.Join(dirname, "metadata") 1611 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1612 if err != nil { 1613 return err 1614 } 1615 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1616 devices.metadataDevice = metadataLoopDevice 1617 devices.metadataLoopFile = metadatafilename 1618 } 1619 } 1620 1621 return nil 1622 } 1623 1624 func (devices *DeviceSet) enableDeferredRemovalDeletion() error { 1625 1626 // If user asked for deferred removal then check both libdm library 1627 // and kernel driver support deferred removal otherwise error out. 1628 if enableDeferredRemoval { 1629 if !driverDeferredRemovalSupport { 1630 return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it") 1631 } 1632 if !devicemapper.LibraryDeferredRemovalSupport { 1633 return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it") 1634 } 1635 logrus.Debug("devmapper: Deferred removal support enabled.") 1636 devices.deferredRemove = true 1637 } 1638 1639 if enableDeferredDeletion { 1640 if !devices.deferredRemove { 1641 return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter") 1642 } 1643 logrus.Debug("devmapper: Deferred deletion support enabled.") 1644 devices.deferredDelete = true 1645 } 1646 return nil 1647 } 1648 1649 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1650 // give ourselves to libdm as a log handler 1651 devicemapper.LogInit(devices) 1652 1653 version, err := devicemapper.GetDriverVersion() 1654 if err != nil { 1655 // Can't even get driver version, assume not supported 1656 return graphdriver.ErrNotSupported 1657 } 1658 1659 if err := determineDriverCapabilities(version); err != nil { 1660 return graphdriver.ErrNotSupported 1661 } 1662 1663 if err := devices.enableDeferredRemovalDeletion(); err != nil { 1664 return err 1665 } 1666 1667 // https://github.com/docker/docker/issues/4036 1668 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1669 if dockerversion.IAmStatic == "true" { 1670 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1671 } else { 1672 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1673 } 1674 1675 if !devices.overrideUdevSyncCheck { 1676 return graphdriver.ErrNotSupported 1677 } 1678 } 1679 1680 //create the root dir of the devmapper driver ownership to match this 1681 //daemon's remapped root uid/gid so containers can start properly 1682 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 1683 if err != nil { 1684 return err 1685 } 1686 if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) { 1687 return err 1688 } 1689 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) { 1690 return err 1691 } 1692 1693 // Set the device prefix from the device id and inode of the docker root dir 1694 1695 st, err := os.Stat(devices.root) 1696 if err != nil { 1697 return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err) 1698 } 1699 sysSt := st.Sys().(*syscall.Stat_t) 1700 // "reg-" stands for "regular file". 1701 // In the future we might use "dev-" for "device file", etc. 1702 // docker-maj,min[-inode] stands for: 1703 // - Managed by docker 1704 // - The target of this device is at major <maj> and minor <min> 1705 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1706 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1707 logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix) 1708 1709 // Check for the existence of the thin-pool device 1710 poolExists, err := devices.thinPoolExists(devices.getPoolName()) 1711 if err != nil { 1712 return err 1713 } 1714 1715 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1716 // that are not Close-on-exec, 1717 // so we add this badhack to make sure it closes itself 1718 setCloseOnExec("/dev/mapper/control") 1719 1720 // Make sure the sparse images exist in <root>/devicemapper/data and 1721 // <root>/devicemapper/metadata 1722 1723 createdLoopback := false 1724 1725 // If the pool doesn't exist, create it 1726 if !poolExists && devices.thinPoolDevice == "" { 1727 logrus.Debug("devmapper: Pool doesn't exist. Creating it.") 1728 1729 var ( 1730 dataFile *os.File 1731 metadataFile *os.File 1732 ) 1733 1734 if devices.dataDevice == "" { 1735 // Make sure the sparse images exist in <root>/devicemapper/data 1736 1737 hasData := devices.hasImage("data") 1738 1739 if !doInit && !hasData { 1740 return errors.New("Loopback data file not found") 1741 } 1742 1743 if !hasData { 1744 createdLoopback = true 1745 } 1746 1747 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1748 if err != nil { 1749 logrus.Debugf("devmapper: Error device ensureImage (data): %s", err) 1750 return err 1751 } 1752 1753 dataFile, err = loopback.AttachLoopDevice(data) 1754 if err != nil { 1755 return err 1756 } 1757 devices.dataLoopFile = data 1758 devices.dataDevice = dataFile.Name() 1759 } else { 1760 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1761 if err != nil { 1762 return err 1763 } 1764 } 1765 defer dataFile.Close() 1766 1767 if devices.metadataDevice == "" { 1768 // Make sure the sparse images exist in <root>/devicemapper/metadata 1769 1770 hasMetadata := devices.hasImage("metadata") 1771 1772 if !doInit && !hasMetadata { 1773 return errors.New("Loopback metadata file not found") 1774 } 1775 1776 if !hasMetadata { 1777 createdLoopback = true 1778 } 1779 1780 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1781 if err != nil { 1782 logrus.Debugf("devmapper: Error device ensureImage (metadata): %s", err) 1783 return err 1784 } 1785 1786 metadataFile, err = loopback.AttachLoopDevice(metadata) 1787 if err != nil { 1788 return err 1789 } 1790 devices.metadataLoopFile = metadata 1791 devices.metadataDevice = metadataFile.Name() 1792 } else { 1793 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1794 if err != nil { 1795 return err 1796 } 1797 } 1798 defer metadataFile.Close() 1799 1800 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1801 return err 1802 } 1803 } 1804 1805 // Pool already exists and caller did not pass us a pool. That means 1806 // we probably created pool earlier and could not remove it as some 1807 // containers were still using it. Detect some of the properties of 1808 // pool, like is it using loop devices. 1809 if poolExists && devices.thinPoolDevice == "" { 1810 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1811 logrus.Debugf("devmapper: Failed to load thin pool loopback device information:%v", err) 1812 return err 1813 } 1814 } 1815 1816 // If we didn't just create the data or metadata image, we need to 1817 // load the transaction id and migrate old metadata 1818 if !createdLoopback { 1819 if err := devices.initMetaData(); err != nil { 1820 return err 1821 } 1822 } 1823 1824 if devices.thinPoolDevice == "" { 1825 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1826 logrus.Warn("devmapper: Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1827 } 1828 } 1829 1830 // Right now this loads only NextDeviceID. If there is more metadata 1831 // down the line, we might have to move it earlier. 1832 if err := devices.loadDeviceSetMetaData(); err != nil { 1833 return err 1834 } 1835 1836 // Setup the base image 1837 if doInit { 1838 if err := devices.setupBaseImage(); err != nil { 1839 logrus.Debugf("devmapper: Error device setupBaseImage: %s", err) 1840 return err 1841 } 1842 } 1843 1844 return nil 1845 } 1846 1847 // AddDevice adds a device and registers in the hash. 1848 func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string]string) error { 1849 logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash) 1850 defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1851 1852 // If a deleted device exists, return error. 1853 baseInfo, err := devices.lookupDeviceWithLock(baseHash) 1854 if err != nil { 1855 return err 1856 } 1857 1858 if baseInfo.Deleted { 1859 return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash) 1860 } 1861 1862 baseInfo.lock.Lock() 1863 defer baseInfo.lock.Unlock() 1864 1865 devices.Lock() 1866 defer devices.Unlock() 1867 1868 // Also include deleted devices in case hash of new device is 1869 // same as one of the deleted devices. 1870 if info, _ := devices.lookupDevice(hash); info != nil { 1871 return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted) 1872 } 1873 1874 size, err := devices.parseStorageOpt(storageOpt) 1875 if err != nil { 1876 return err 1877 } 1878 1879 if size == 0 { 1880 size = baseInfo.Size 1881 } 1882 1883 if size < baseInfo.Size { 1884 return fmt.Errorf("devmapper: Container size cannot be smaller than %s", units.HumanSize(float64(baseInfo.Size))) 1885 } 1886 1887 if err := devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil { 1888 return err 1889 } 1890 1891 // Grow the container rootfs. 1892 if size > baseInfo.Size { 1893 info, err := devices.lookupDevice(hash) 1894 if err != nil { 1895 return err 1896 } 1897 1898 if err := devices.growFS(info); err != nil { 1899 return err 1900 } 1901 } 1902 1903 return nil 1904 } 1905 1906 func (devices *DeviceSet) parseStorageOpt(storageOpt map[string]string) (uint64, error) { 1907 1908 // Read size to change the block device size per container. 1909 for key, val := range storageOpt { 1910 key := strings.ToLower(key) 1911 switch key { 1912 case "size": 1913 size, err := units.RAMInBytes(val) 1914 if err != nil { 1915 return 0, err 1916 } 1917 return uint64(size), nil 1918 default: 1919 return 0, fmt.Errorf("Unknown option %s", key) 1920 } 1921 } 1922 1923 return 0, nil 1924 } 1925 1926 func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error { 1927 // If device is already in deleted state, there is nothing to be done. 1928 if info.Deleted { 1929 return nil 1930 } 1931 1932 logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash) 1933 1934 info.Deleted = true 1935 1936 // save device metadata to reflect deleted state. 1937 if err := devices.saveMetadata(info); err != nil { 1938 info.Deleted = false 1939 return err 1940 } 1941 1942 devices.nrDeletedDevices++ 1943 return nil 1944 } 1945 1946 // Should be called with devices.Lock() held. 1947 func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error { 1948 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1949 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceId = %d", "", info.DeviceID) 1950 return err 1951 } 1952 1953 defer devices.closeTransaction() 1954 1955 err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID) 1956 if err != nil { 1957 // If syncDelete is true, we want to return error. If deferred 1958 // deletion is not enabled, we return an error. If error is 1959 // something other then EBUSY, return an error. 1960 if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy { 1961 logrus.Debugf("devmapper: Error deleting device: %s", err) 1962 return err 1963 } 1964 } 1965 1966 if err == nil { 1967 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 1968 return err 1969 } 1970 // If device was already in deferred delete state that means 1971 // deletion was being tried again later. Reduce the deleted 1972 // device count. 1973 if info.Deleted { 1974 devices.nrDeletedDevices-- 1975 } 1976 devices.markDeviceIDFree(info.DeviceID) 1977 } else { 1978 if err := devices.markForDeferredDeletion(info); err != nil { 1979 return err 1980 } 1981 } 1982 1983 return nil 1984 } 1985 1986 // Issue discard only if device open count is zero. 1987 func (devices *DeviceSet) issueDiscard(info *devInfo) error { 1988 logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash) 1989 defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash) 1990 // This is a workaround for the kernel not discarding block so 1991 // on the thin pool when we remove a thinp device, so we do it 1992 // manually. 1993 // Even if device is deferred deleted, activate it and issue 1994 // discards. 1995 if err := devices.activateDeviceIfNeeded(info, true); err != nil { 1996 return err 1997 } 1998 1999 devinfo, err := devicemapper.GetInfo(info.Name()) 2000 if err != nil { 2001 return err 2002 } 2003 2004 if devinfo.OpenCount != 0 { 2005 logrus.Debugf("devmapper: Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount) 2006 return nil 2007 } 2008 2009 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 2010 logrus.Debugf("devmapper: Error discarding block on device: %s (ignoring)", err) 2011 } 2012 return nil 2013 } 2014 2015 // Should be called with devices.Lock() held. 2016 func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error { 2017 if devices.doBlkDiscard { 2018 devices.issueDiscard(info) 2019 } 2020 2021 // Try to deactivate device in case it is active. 2022 if err := devices.deactivateDevice(info); err != nil { 2023 logrus.Debugf("devmapper: Error deactivating device: %s", err) 2024 return err 2025 } 2026 2027 if err := devices.deleteTransaction(info, syncDelete); err != nil { 2028 return err 2029 } 2030 2031 return nil 2032 } 2033 2034 // DeleteDevice will return success if device has been marked for deferred 2035 // removal. If one wants to override that and want DeleteDevice() to fail if 2036 // device was busy and could not be deleted, set syncDelete=true. 2037 func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error { 2038 logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete) 2039 defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete) 2040 info, err := devices.lookupDeviceWithLock(hash) 2041 if err != nil { 2042 return err 2043 } 2044 2045 info.lock.Lock() 2046 defer info.lock.Unlock() 2047 2048 devices.Lock() 2049 defer devices.Unlock() 2050 2051 return devices.deleteDevice(info, syncDelete) 2052 } 2053 2054 func (devices *DeviceSet) deactivatePool() error { 2055 logrus.Debug("devmapper: deactivatePool()") 2056 defer logrus.Debug("devmapper: deactivatePool END") 2057 devname := devices.getPoolDevName() 2058 2059 devinfo, err := devicemapper.GetInfo(devname) 2060 if err != nil { 2061 return err 2062 } 2063 2064 if devinfo.Exists == 0 { 2065 return nil 2066 } 2067 if err := devicemapper.RemoveDevice(devname); err != nil { 2068 return err 2069 } 2070 2071 if d, err := devicemapper.GetDeps(devname); err == nil { 2072 logrus.Warnf("devmapper: device %s still has %d active dependents", devname, d.Count) 2073 } 2074 2075 return nil 2076 } 2077 2078 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 2079 logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash) 2080 defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash) 2081 2082 devinfo, err := devicemapper.GetInfo(info.Name()) 2083 if err != nil { 2084 return err 2085 } 2086 2087 if devinfo.Exists == 0 { 2088 return nil 2089 } 2090 2091 if devices.deferredRemove { 2092 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 2093 return err 2094 } 2095 } else { 2096 if err := devices.removeDevice(info.Name()); err != nil { 2097 return err 2098 } 2099 } 2100 return nil 2101 } 2102 2103 // Issues the underlying dm remove operation. 2104 func (devices *DeviceSet) removeDevice(devname string) error { 2105 var err error 2106 2107 logrus.Debugf("devmapper: removeDevice START(%s)", devname) 2108 defer logrus.Debugf("devmapper: removeDevice END(%s)", devname) 2109 2110 for i := 0; i < 200; i++ { 2111 err = devicemapper.RemoveDevice(devname) 2112 if err == nil { 2113 break 2114 } 2115 if err != devicemapper.ErrBusy { 2116 return err 2117 } 2118 2119 // If we see EBUSY it may be a transient error, 2120 // sleep a bit a retry a few times. 2121 devices.Unlock() 2122 time.Sleep(100 * time.Millisecond) 2123 devices.Lock() 2124 } 2125 2126 return err 2127 } 2128 2129 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 2130 if !devices.deferredRemove { 2131 return nil 2132 } 2133 2134 logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name()) 2135 defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name()) 2136 2137 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 2138 2139 if devinfo != nil && devinfo.DeferredRemove == 0 { 2140 return nil 2141 } 2142 2143 // Cancel deferred remove 2144 for i := 0; i < 100; i++ { 2145 err = devicemapper.CancelDeferredRemove(info.Name()) 2146 if err == nil { 2147 break 2148 } 2149 2150 if err == devicemapper.ErrEnxio { 2151 // Device is probably already gone. Return success. 2152 return nil 2153 } 2154 2155 if err != devicemapper.ErrBusy { 2156 return err 2157 } 2158 2159 // If we see EBUSY it may be a transient error, 2160 // sleep a bit a retry a few times. 2161 devices.Unlock() 2162 time.Sleep(100 * time.Millisecond) 2163 devices.Lock() 2164 } 2165 return err 2166 } 2167 2168 // Shutdown shuts down the device by unmounting the root. 2169 func (devices *DeviceSet) Shutdown(home string) error { 2170 logrus.Debugf("devmapper: [deviceset %s] Shutdown()", devices.devicePrefix) 2171 logrus.Debugf("devmapper: Shutting down DeviceSet: %s", devices.root) 2172 defer logrus.Debugf("devmapper: [deviceset %s] Shutdown() END", devices.devicePrefix) 2173 2174 // Stop deletion worker. This should start delivering new events to 2175 // ticker channel. That means no new instance of cleanupDeletedDevice() 2176 // will run after this call. If one instance is already running at 2177 // the time of the call, it must be holding devices.Lock() and 2178 // we will block on this lock till cleanup function exits. 2179 devices.deletionWorkerTicker.Stop() 2180 2181 devices.Lock() 2182 // Save DeviceSet Metadata first. Docker kills all threads if they 2183 // don't finish in certain time. It is possible that Shutdown() 2184 // routine does not finish in time as we loop trying to deactivate 2185 // some devices while these are busy. In that case shutdown() routine 2186 // will be killed and we will not get a chance to save deviceset 2187 // metadata. Hence save this early before trying to deactivate devices. 2188 devices.saveDeviceSetMetaData() 2189 2190 // ignore the error since it's just a best effort to not try to unmount something that's mounted 2191 mounts, _ := mount.GetMounts() 2192 mounted := make(map[string]bool, len(mounts)) 2193 for _, mnt := range mounts { 2194 mounted[mnt.Mountpoint] = true 2195 } 2196 2197 if err := filepath.Walk(path.Join(home, "mnt"), func(p string, info os.FileInfo, err error) error { 2198 if err != nil { 2199 return err 2200 } 2201 if !info.IsDir() { 2202 return nil 2203 } 2204 2205 if mounted[p] { 2206 // We use MNT_DETACH here in case it is still busy in some running 2207 // container. This means it'll go away from the global scope directly, 2208 // and the device will be released when that container dies. 2209 if err := syscall.Unmount(p, syscall.MNT_DETACH); err != nil { 2210 logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", p, err) 2211 } 2212 } 2213 2214 if devInfo, err := devices.lookupDevice(path.Base(p)); err != nil { 2215 logrus.Debugf("devmapper: Shutdown lookup device %s, error: %s", path.Base(p), err) 2216 } else { 2217 if err := devices.deactivateDevice(devInfo); err != nil { 2218 logrus.Debugf("devmapper: Shutdown deactivate %s , error: %s", devInfo.Hash, err) 2219 } 2220 } 2221 2222 return nil 2223 }); err != nil && !os.IsNotExist(err) { 2224 devices.Unlock() 2225 return err 2226 } 2227 2228 devices.Unlock() 2229 2230 info, _ := devices.lookupDeviceWithLock("") 2231 if info != nil { 2232 info.lock.Lock() 2233 devices.Lock() 2234 if err := devices.deactivateDevice(info); err != nil { 2235 logrus.Debugf("devmapper: Shutdown deactivate base , error: %s", err) 2236 } 2237 devices.Unlock() 2238 info.lock.Unlock() 2239 } 2240 2241 devices.Lock() 2242 if devices.thinPoolDevice == "" { 2243 if err := devices.deactivatePool(); err != nil { 2244 logrus.Debugf("devmapper: Shutdown deactivate pool , error: %s", err) 2245 } 2246 } 2247 devices.Unlock() 2248 2249 return nil 2250 } 2251 2252 // MountDevice mounts the device if not already mounted. 2253 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 2254 info, err := devices.lookupDeviceWithLock(hash) 2255 if err != nil { 2256 return err 2257 } 2258 2259 if info.Deleted { 2260 return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash) 2261 } 2262 2263 info.lock.Lock() 2264 defer info.lock.Unlock() 2265 2266 devices.Lock() 2267 defer devices.Unlock() 2268 2269 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2270 return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2271 } 2272 2273 fstype, err := ProbeFsType(info.DevName()) 2274 if err != nil { 2275 return err 2276 } 2277 2278 options := "" 2279 2280 if fstype == "xfs" { 2281 // XFS needs nouuid or it can't mount filesystems with the same fs 2282 options = joinMountOptions(options, "nouuid") 2283 } 2284 2285 options = joinMountOptions(options, devices.mountOptions) 2286 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 2287 2288 if err := mount.Mount(info.DevName(), path, fstype, options); err != nil { 2289 return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err) 2290 } 2291 2292 return nil 2293 } 2294 2295 // UnmountDevice unmounts the device and removes it from hash. 2296 func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { 2297 logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash) 2298 defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash) 2299 2300 info, err := devices.lookupDeviceWithLock(hash) 2301 if err != nil { 2302 return err 2303 } 2304 2305 info.lock.Lock() 2306 defer info.lock.Unlock() 2307 2308 devices.Lock() 2309 defer devices.Unlock() 2310 2311 logrus.Debugf("devmapper: Unmount(%s)", mountPath) 2312 if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil { 2313 return err 2314 } 2315 logrus.Debug("devmapper: Unmount done") 2316 2317 if err := devices.deactivateDevice(info); err != nil { 2318 return err 2319 } 2320 2321 return nil 2322 } 2323 2324 // HasDevice returns true if the device metadata exists. 2325 func (devices *DeviceSet) HasDevice(hash string) bool { 2326 info, _ := devices.lookupDeviceWithLock(hash) 2327 return info != nil 2328 } 2329 2330 // List returns a list of device ids. 2331 func (devices *DeviceSet) List() []string { 2332 devices.Lock() 2333 defer devices.Unlock() 2334 2335 ids := make([]string, len(devices.Devices)) 2336 i := 0 2337 for k := range devices.Devices { 2338 ids[i] = k 2339 i++ 2340 } 2341 return ids 2342 } 2343 2344 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 2345 var params string 2346 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 2347 if err != nil { 2348 return 2349 } 2350 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 2351 return 2352 } 2353 return 2354 } 2355 2356 // GetDeviceStatus provides size, mapped sectors 2357 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 2358 info, err := devices.lookupDeviceWithLock(hash) 2359 if err != nil { 2360 return nil, err 2361 } 2362 2363 info.lock.Lock() 2364 defer info.lock.Unlock() 2365 2366 devices.Lock() 2367 defer devices.Unlock() 2368 2369 status := &DevStatus{ 2370 DeviceID: info.DeviceID, 2371 Size: info.Size, 2372 TransactionID: info.TransactionID, 2373 } 2374 2375 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2376 return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2377 } 2378 2379 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 2380 2381 if err != nil { 2382 return nil, err 2383 } 2384 2385 status.SizeInSectors = sizeInSectors 2386 status.MappedSectors = mappedSectors 2387 status.HighestMappedSector = highestMappedSector 2388 2389 return status, nil 2390 } 2391 2392 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 2393 var params string 2394 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 2395 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 2396 } 2397 return 2398 } 2399 2400 // DataDevicePath returns the path to the data storage for this deviceset, 2401 // regardless of loopback or block device 2402 func (devices *DeviceSet) DataDevicePath() string { 2403 return devices.dataDevice 2404 } 2405 2406 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 2407 // regardless of loopback or block device 2408 func (devices *DeviceSet) MetadataDevicePath() string { 2409 return devices.metadataDevice 2410 } 2411 2412 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 2413 buf := new(syscall.Statfs_t) 2414 if err := syscall.Statfs(loopFile, buf); err != nil { 2415 logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err) 2416 return 0, err 2417 } 2418 return buf.Bfree * uint64(buf.Bsize), nil 2419 } 2420 2421 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 2422 if loopFile != "" { 2423 fi, err := os.Stat(loopFile) 2424 if err != nil { 2425 logrus.Warnf("devmapper: Couldn't stat loopfile %v: %v", loopFile, err) 2426 return false, err 2427 } 2428 return fi.Mode().IsRegular(), nil 2429 } 2430 return false, nil 2431 } 2432 2433 // Status returns the current status of this deviceset 2434 func (devices *DeviceSet) Status() *Status { 2435 devices.Lock() 2436 defer devices.Unlock() 2437 2438 status := &Status{} 2439 2440 status.PoolName = devices.getPoolName() 2441 status.DataFile = devices.DataDevicePath() 2442 status.DataLoopback = devices.dataLoopFile 2443 status.MetadataFile = devices.MetadataDevicePath() 2444 status.MetadataLoopback = devices.metadataLoopFile 2445 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 2446 status.DeferredRemoveEnabled = devices.deferredRemove 2447 status.DeferredDeleteEnabled = devices.deferredDelete 2448 status.DeferredDeletedDeviceCount = devices.nrDeletedDevices 2449 status.BaseDeviceSize = devices.getBaseDeviceSize() 2450 status.BaseDeviceFS = devices.getBaseDeviceFS() 2451 2452 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 2453 if err == nil { 2454 // Convert from blocks to bytes 2455 blockSizeInSectors := totalSizeInSectors / dataTotal 2456 2457 status.Data.Used = dataUsed * blockSizeInSectors * 512 2458 status.Data.Total = dataTotal * blockSizeInSectors * 512 2459 status.Data.Available = status.Data.Total - status.Data.Used 2460 2461 // metadata blocks are always 4k 2462 status.Metadata.Used = metadataUsed * 4096 2463 status.Metadata.Total = metadataTotal * 4096 2464 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 2465 2466 status.SectorSize = blockSizeInSectors * 512 2467 2468 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 2469 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 2470 if err == nil && actualSpace < status.Data.Available { 2471 status.Data.Available = actualSpace 2472 } 2473 } 2474 2475 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 2476 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 2477 if err == nil && actualSpace < status.Metadata.Available { 2478 status.Metadata.Available = actualSpace 2479 } 2480 } 2481 2482 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 2483 status.MinFreeSpace = minFreeData * blockSizeInSectors * 512 2484 } 2485 2486 return status 2487 } 2488 2489 // Status returns the current status of this deviceset 2490 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 2491 info, err := devices.lookupDeviceWithLock(hash) 2492 if err != nil { 2493 return nil, err 2494 } 2495 2496 info.lock.Lock() 2497 defer info.lock.Unlock() 2498 2499 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 2500 return metadata, nil 2501 } 2502 2503 // NewDeviceSet creates the device set based on the options provided. 2504 func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) { 2505 devicemapper.SetDevDir("/dev") 2506 2507 devices := &DeviceSet{ 2508 root: root, 2509 metaData: metaData{Devices: make(map[string]*devInfo)}, 2510 dataLoopbackSize: defaultDataLoopbackSize, 2511 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 2512 baseFsSize: defaultBaseFsSize, 2513 overrideUdevSyncCheck: defaultUdevSyncOverride, 2514 doBlkDiscard: true, 2515 thinpBlockSize: defaultThinpBlockSize, 2516 deviceIDMap: make([]byte, deviceIDMapSz), 2517 deletionWorkerTicker: time.NewTicker(time.Second * 30), 2518 uidMaps: uidMaps, 2519 gidMaps: gidMaps, 2520 minFreeSpacePercent: defaultMinFreeSpacePercent, 2521 } 2522 2523 foundBlkDiscard := false 2524 for _, option := range options { 2525 key, val, err := parsers.ParseKeyValueOpt(option) 2526 if err != nil { 2527 return nil, err 2528 } 2529 key = strings.ToLower(key) 2530 switch key { 2531 case "dm.basesize": 2532 size, err := units.RAMInBytes(val) 2533 if err != nil { 2534 return nil, err 2535 } 2536 userBaseSize = true 2537 devices.baseFsSize = uint64(size) 2538 case "dm.loopdatasize": 2539 size, err := units.RAMInBytes(val) 2540 if err != nil { 2541 return nil, err 2542 } 2543 devices.dataLoopbackSize = size 2544 case "dm.loopmetadatasize": 2545 size, err := units.RAMInBytes(val) 2546 if err != nil { 2547 return nil, err 2548 } 2549 devices.metaDataLoopbackSize = size 2550 case "dm.fs": 2551 if val != "ext4" && val != "xfs" { 2552 return nil, fmt.Errorf("devmapper: Unsupported filesystem %s\n", val) 2553 } 2554 devices.filesystem = val 2555 case "dm.mkfsarg": 2556 devices.mkfsArgs = append(devices.mkfsArgs, val) 2557 case "dm.mountopt": 2558 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2559 case "dm.metadatadev": 2560 devices.metadataDevice = val 2561 case "dm.datadev": 2562 devices.dataDevice = val 2563 case "dm.thinpooldev": 2564 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2565 case "dm.blkdiscard": 2566 foundBlkDiscard = true 2567 devices.doBlkDiscard, err = strconv.ParseBool(val) 2568 if err != nil { 2569 return nil, err 2570 } 2571 case "dm.blocksize": 2572 size, err := units.RAMInBytes(val) 2573 if err != nil { 2574 return nil, err 2575 } 2576 // convert to 512b sectors 2577 devices.thinpBlockSize = uint32(size) >> 9 2578 case "dm.override_udev_sync_check": 2579 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2580 if err != nil { 2581 return nil, err 2582 } 2583 2584 case "dm.use_deferred_removal": 2585 enableDeferredRemoval, err = strconv.ParseBool(val) 2586 if err != nil { 2587 return nil, err 2588 } 2589 2590 case "dm.use_deferred_deletion": 2591 enableDeferredDeletion, err = strconv.ParseBool(val) 2592 if err != nil { 2593 return nil, err 2594 } 2595 2596 case "dm.min_free_space": 2597 if !strings.HasSuffix(val, "%") { 2598 return nil, fmt.Errorf("devmapper: Option dm.min_free_space requires %% suffix") 2599 } 2600 2601 valstring := strings.TrimSuffix(val, "%") 2602 minFreeSpacePercent, err := strconv.ParseUint(valstring, 10, 32) 2603 if err != nil { 2604 return nil, err 2605 } 2606 2607 if minFreeSpacePercent >= 100 { 2608 return nil, fmt.Errorf("devmapper: Invalid value %v for option dm.min_free_space", val) 2609 } 2610 2611 devices.minFreeSpacePercent = uint32(minFreeSpacePercent) 2612 default: 2613 return nil, fmt.Errorf("devmapper: Unknown option %s\n", key) 2614 } 2615 } 2616 2617 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2618 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2619 devices.doBlkDiscard = false 2620 } 2621 2622 if err := devices.initDevmapper(doInit); err != nil { 2623 return nil, err 2624 } 2625 2626 return devices, nil 2627 }