github.com/hustcat/docker@v1.3.3-0.20160314103604-901c67a8eeab/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "bufio" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 "path" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "syscall" 20 "time" 21 22 "github.com/Sirupsen/logrus" 23 24 "github.com/docker/docker/daemon/graphdriver" 25 "github.com/docker/docker/pkg/devicemapper" 26 "github.com/docker/docker/pkg/idtools" 27 "github.com/docker/docker/pkg/loopback" 28 "github.com/docker/docker/pkg/mount" 29 "github.com/docker/docker/pkg/parsers" 30 "github.com/docker/go-units" 31 32 "github.com/opencontainers/runc/libcontainer/label" 33 ) 34 35 var ( 36 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 37 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 38 defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024 39 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 40 defaultUdevSyncOverride = false 41 maxDeviceID = 0xffffff // 24 bit, pool limit 42 deviceIDMapSz = (maxDeviceID + 1) / 8 43 // We retry device removal so many a times that even error messages 44 // will fill up console during normal operation. So only log Fatal 45 // messages by default. 46 logLevel = devicemapper.LogLevelFatal 47 driverDeferredRemovalSupport = false 48 enableDeferredRemoval = false 49 enableDeferredDeletion = false 50 userBaseSize = false 51 ) 52 53 const deviceSetMetaFile string = "deviceset-metadata" 54 const transactionMetaFile string = "transaction-metadata" 55 56 type transaction struct { 57 OpenTransactionID uint64 `json:"open_transaction_id"` 58 DeviceIDHash string `json:"device_hash"` 59 DeviceID int `json:"device_id"` 60 } 61 62 type devInfo struct { 63 Hash string `json:"-"` 64 DeviceID int `json:"device_id"` 65 Size uint64 `json:"size"` 66 TransactionID uint64 `json:"transaction_id"` 67 Initialized bool `json:"initialized"` 68 Deleted bool `json:"deleted"` 69 devices *DeviceSet 70 71 mountCount int 72 mountPath string 73 74 // The global DeviceSet lock guarantees that we serialize all 75 // the calls to libdevmapper (which is not threadsafe), but we 76 // sometimes release that lock while sleeping. In that case 77 // this per-device lock is still held, protecting against 78 // other accesses to the device that we're doing the wait on. 79 // 80 // WARNING: In order to avoid AB-BA deadlocks when releasing 81 // the global lock while holding the per-device locks all 82 // device locks must be acquired *before* the device lock, and 83 // multiple device locks should be acquired parent before child. 84 lock sync.Mutex 85 } 86 87 type metaData struct { 88 Devices map[string]*devInfo `json:"Devices"` 89 } 90 91 // DeviceSet holds information about list of devices 92 type DeviceSet struct { 93 metaData `json:"-"` 94 sync.Mutex `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper 95 root string 96 devicePrefix string 97 TransactionID uint64 `json:"-"` 98 NextDeviceID int `json:"next_device_id"` 99 deviceIDMap []byte 100 101 // Options 102 dataLoopbackSize int64 103 metaDataLoopbackSize int64 104 baseFsSize uint64 105 filesystem string 106 mountOptions string 107 mkfsArgs []string 108 dataDevice string // block or loop dev 109 dataLoopFile string // loopback file, if used 110 metadataDevice string // block or loop dev 111 metadataLoopFile string // loopback file, if used 112 doBlkDiscard bool 113 thinpBlockSize uint32 114 thinPoolDevice string 115 transaction `json:"-"` 116 overrideUdevSyncCheck bool 117 deferredRemove bool // use deferred removal 118 deferredDelete bool // use deferred deletion 119 BaseDeviceUUID string // save UUID of base device 120 BaseDeviceFilesystem string // save filesystem of base device 121 nrDeletedDevices uint // number of deleted devices 122 deletionWorkerTicker *time.Ticker 123 uidMaps []idtools.IDMap 124 gidMaps []idtools.IDMap 125 } 126 127 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 128 type DiskUsage struct { 129 // Used bytes on the disk. 130 Used uint64 131 // Total bytes on the disk. 132 Total uint64 133 // Available bytes on the disk. 134 Available uint64 135 } 136 137 // Status returns the information about the device. 138 type Status struct { 139 // PoolName is the name of the data pool. 140 PoolName string 141 // DataFile is the actual block device for data. 142 DataFile string 143 // DataLoopback loopback file, if used. 144 DataLoopback string 145 // MetadataFile is the actual block device for metadata. 146 MetadataFile string 147 // MetadataLoopback is the loopback file, if used. 148 MetadataLoopback string 149 // Data is the disk used for data. 150 Data DiskUsage 151 // Metadata is the disk used for meta data. 152 Metadata DiskUsage 153 // BaseDeviceSize is base size of container and image 154 BaseDeviceSize uint64 155 // BaseDeviceFS is backing filesystem. 156 BaseDeviceFS string 157 // SectorSize size of the vector. 158 SectorSize uint64 159 // UdevSyncSupported is true if sync is supported. 160 UdevSyncSupported bool 161 // DeferredRemoveEnabled is true then the device is not unmounted. 162 DeferredRemoveEnabled bool 163 // True if deferred deletion is enabled. This is different from 164 // deferred removal. "removal" means that device mapper device is 165 // deactivated. Thin device is still in thin pool and can be activated 166 // again. But "deletion" means that thin device will be deleted from 167 // thin pool and it can't be activated again. 168 DeferredDeleteEnabled bool 169 DeferredDeletedDeviceCount uint 170 } 171 172 // Structure used to export image/container metadata in docker inspect. 173 type deviceMetadata struct { 174 deviceID int 175 deviceSize uint64 // size in bytes 176 deviceName string // Device name as used during activation 177 } 178 179 // DevStatus returns information about device mounted containing its id, size and sector information. 180 type DevStatus struct { 181 // DeviceID is the id of the device. 182 DeviceID int 183 // Size is the size of the filesystem. 184 Size uint64 185 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 186 TransactionID uint64 187 // SizeInSectors indicates the size of the sectors allocated. 188 SizeInSectors uint64 189 // MappedSectors indicates number of mapped sectors. 190 MappedSectors uint64 191 // HighestMappedSector is the pointer to the highest mapped sector. 192 HighestMappedSector uint64 193 } 194 195 func getDevName(name string) string { 196 return "/dev/mapper/" + name 197 } 198 199 func (info *devInfo) Name() string { 200 hash := info.Hash 201 if hash == "" { 202 hash = "base" 203 } 204 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 205 } 206 207 func (info *devInfo) DevName() string { 208 return getDevName(info.Name()) 209 } 210 211 func (devices *DeviceSet) loopbackDir() string { 212 return path.Join(devices.root, "devicemapper") 213 } 214 215 func (devices *DeviceSet) metadataDir() string { 216 return path.Join(devices.root, "metadata") 217 } 218 219 func (devices *DeviceSet) metadataFile(info *devInfo) string { 220 file := info.Hash 221 if file == "" { 222 file = "base" 223 } 224 return path.Join(devices.metadataDir(), file) 225 } 226 227 func (devices *DeviceSet) transactionMetaFile() string { 228 return path.Join(devices.metadataDir(), transactionMetaFile) 229 } 230 231 func (devices *DeviceSet) deviceSetMetaFile() string { 232 return path.Join(devices.metadataDir(), deviceSetMetaFile) 233 } 234 235 func (devices *DeviceSet) oldMetadataFile() string { 236 return path.Join(devices.loopbackDir(), "json") 237 } 238 239 func (devices *DeviceSet) getPoolName() string { 240 if devices.thinPoolDevice == "" { 241 return devices.devicePrefix + "-pool" 242 } 243 return devices.thinPoolDevice 244 } 245 246 func (devices *DeviceSet) getPoolDevName() string { 247 return getDevName(devices.getPoolName()) 248 } 249 250 func (devices *DeviceSet) hasImage(name string) bool { 251 dirname := devices.loopbackDir() 252 filename := path.Join(dirname, name) 253 254 _, err := os.Stat(filename) 255 return err == nil 256 } 257 258 // ensureImage creates a sparse file of <size> bytes at the path 259 // <root>/devicemapper/<name>. 260 // If the file already exists and new size is larger than its current size, it grows to the new size. 261 // Either way it returns the full path. 262 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 263 dirname := devices.loopbackDir() 264 filename := path.Join(dirname, name) 265 266 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 267 if err != nil { 268 return "", err 269 } 270 if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) { 271 return "", err 272 } 273 274 if fi, err := os.Stat(filename); err != nil { 275 if !os.IsNotExist(err) { 276 return "", err 277 } 278 logrus.Debugf("devmapper: Creating loopback file %s for device-manage use", filename) 279 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 280 if err != nil { 281 return "", err 282 } 283 defer file.Close() 284 285 if err := file.Truncate(size); err != nil { 286 return "", err 287 } 288 } else { 289 if fi.Size() < size { 290 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 291 if err != nil { 292 return "", err 293 } 294 defer file.Close() 295 if err := file.Truncate(size); err != nil { 296 return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err) 297 } 298 } else if fi.Size() > size { 299 logrus.Warnf("devmapper: Can't shrink loopback file %s", filename) 300 } 301 } 302 return filename, nil 303 } 304 305 func (devices *DeviceSet) allocateTransactionID() uint64 { 306 devices.OpenTransactionID = devices.TransactionID + 1 307 return devices.OpenTransactionID 308 } 309 310 func (devices *DeviceSet) updatePoolTransactionID() error { 311 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 312 return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err) 313 } 314 devices.TransactionID = devices.OpenTransactionID 315 return nil 316 } 317 318 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 319 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 320 return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err) 321 } 322 return nil 323 } 324 325 // Given json data and file path, write it to disk 326 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 327 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 328 if err != nil { 329 return fmt.Errorf("devmapper: Error creating metadata file: %s", err) 330 } 331 332 n, err := tmpFile.Write(jsonData) 333 if err != nil { 334 return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err) 335 } 336 if n < len(jsonData) { 337 return io.ErrShortWrite 338 } 339 if err := tmpFile.Sync(); err != nil { 340 return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err) 341 } 342 if err := tmpFile.Close(); err != nil { 343 return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err) 344 } 345 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 346 return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err) 347 } 348 349 return nil 350 } 351 352 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 353 jsonData, err := json.Marshal(info) 354 if err != nil { 355 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 356 } 357 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 358 return err 359 } 360 return nil 361 } 362 363 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 364 var mask byte 365 i := deviceID % 8 366 mask = 1 << uint(i) 367 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 368 } 369 370 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 371 var mask byte 372 i := deviceID % 8 373 mask = ^(1 << uint(i)) 374 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 375 } 376 377 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 378 var mask byte 379 i := deviceID % 8 380 mask = (1 << uint(i)) 381 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 382 return false 383 } 384 return true 385 } 386 387 // Should be called with devices.Lock() held. 388 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 389 info := devices.Devices[hash] 390 if info == nil { 391 info = devices.loadMetadata(hash) 392 if info == nil { 393 return nil, fmt.Errorf("devmapper: Unknown device %s", hash) 394 } 395 396 devices.Devices[hash] = info 397 } 398 return info, nil 399 } 400 401 func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) { 402 devices.Lock() 403 defer devices.Unlock() 404 info, err := devices.lookupDevice(hash) 405 return info, err 406 } 407 408 // This function relies on that device hash map has been loaded in advance. 409 // Should be called with devices.Lock() held. 410 func (devices *DeviceSet) constructDeviceIDMap() { 411 logrus.Debugf("devmapper: constructDeviceIDMap()") 412 defer logrus.Debugf("devmapper: constructDeviceIDMap() END") 413 414 for _, info := range devices.Devices { 415 devices.markDeviceIDUsed(info.DeviceID) 416 logrus.Debugf("devmapper: Added deviceId=%d to DeviceIdMap", info.DeviceID) 417 } 418 } 419 420 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 421 422 // Skip some of the meta files which are not device files. 423 if strings.HasSuffix(finfo.Name(), ".migrated") { 424 logrus.Debugf("devmapper: Skipping file %s", path) 425 return nil 426 } 427 428 if strings.HasPrefix(finfo.Name(), ".") { 429 logrus.Debugf("devmapper: Skipping file %s", path) 430 return nil 431 } 432 433 if finfo.Name() == deviceSetMetaFile { 434 logrus.Debugf("devmapper: Skipping file %s", path) 435 return nil 436 } 437 438 if finfo.Name() == transactionMetaFile { 439 logrus.Debugf("devmapper: Skipping file %s", path) 440 return nil 441 } 442 443 logrus.Debugf("devmapper: Loading data for file %s", path) 444 445 hash := finfo.Name() 446 if hash == "base" { 447 hash = "" 448 } 449 450 // Include deleted devices also as cleanup delete device logic 451 // will go through it and see if there are any deleted devices. 452 if _, err := devices.lookupDevice(hash); err != nil { 453 return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err) 454 } 455 456 return nil 457 } 458 459 func (devices *DeviceSet) loadDeviceFilesOnStart() error { 460 logrus.Debugf("devmapper: loadDeviceFilesOnStart()") 461 defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END") 462 463 var scan = func(path string, info os.FileInfo, err error) error { 464 if err != nil { 465 logrus.Debugf("devmapper: Can't walk the file %s", path) 466 return nil 467 } 468 469 // Skip any directories 470 if info.IsDir() { 471 return nil 472 } 473 474 return devices.deviceFileWalkFunction(path, info) 475 } 476 477 return filepath.Walk(devices.metadataDir(), scan) 478 } 479 480 // Should be called with devices.Lock() held. 481 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 482 logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash) 483 info := &devInfo{ 484 Hash: hash, 485 DeviceID: id, 486 } 487 488 delete(devices.Devices, hash) 489 490 if err := devices.removeMetadata(info); err != nil { 491 logrus.Debugf("devmapper: Error removing metadata: %s", err) 492 return err 493 } 494 495 return nil 496 } 497 498 // Should be called with devices.Lock() held. 499 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 500 logrus.Debugf("devmapper: registerDevice(%v, %v)", id, hash) 501 info := &devInfo{ 502 Hash: hash, 503 DeviceID: id, 504 Size: size, 505 TransactionID: transactionID, 506 Initialized: false, 507 devices: devices, 508 } 509 510 devices.Devices[hash] = info 511 512 if err := devices.saveMetadata(info); err != nil { 513 // Try to remove unused device 514 delete(devices.Devices, hash) 515 return nil, err 516 } 517 518 return info, nil 519 } 520 521 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error { 522 logrus.Debugf("devmapper: activateDeviceIfNeeded(%v)", info.Hash) 523 524 if info.Deleted && !ignoreDeleted { 525 return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash) 526 } 527 528 // Make sure deferred removal on device is canceled, if one was 529 // scheduled. 530 if err := devices.cancelDeferredRemoval(info); err != nil { 531 return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err) 532 } 533 534 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 535 return nil 536 } 537 538 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 539 } 540 541 // Return true only if kernel supports xfs and mkfs.xfs is available 542 func xfsSupported() bool { 543 // Make sure mkfs.xfs is available 544 if _, err := exec.LookPath("mkfs.xfs"); err != nil { 545 return false 546 } 547 548 // Check if kernel supports xfs filesystem or not. 549 exec.Command("modprobe", "xfs").Run() 550 551 f, err := os.Open("/proc/filesystems") 552 if err != nil { 553 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 554 return false 555 } 556 defer f.Close() 557 558 s := bufio.NewScanner(f) 559 for s.Scan() { 560 if strings.HasSuffix(s.Text(), "\txfs") { 561 return true 562 } 563 } 564 565 if err := s.Err(); err != nil { 566 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 567 } 568 return false 569 } 570 571 func determineDefaultFS() string { 572 if xfsSupported() { 573 return "xfs" 574 } 575 576 logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem") 577 return "ext4" 578 } 579 580 func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) { 581 devname := info.DevName() 582 583 args := []string{} 584 for _, arg := range devices.mkfsArgs { 585 args = append(args, arg) 586 } 587 588 args = append(args, devname) 589 590 if devices.filesystem == "" { 591 devices.filesystem = determineDefaultFS() 592 } 593 if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil { 594 return err 595 } 596 597 logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name()) 598 defer func() { 599 if err != nil { 600 logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err) 601 } else { 602 logrus.Infof("devmapper: Successfully created filesystem %s on device %s", devices.filesystem, info.Name()) 603 } 604 }() 605 606 switch devices.filesystem { 607 case "xfs": 608 err = exec.Command("mkfs.xfs", args...).Run() 609 case "ext4": 610 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 611 if err != nil { 612 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 613 } 614 if err != nil { 615 return err 616 } 617 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 618 default: 619 err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem) 620 } 621 return 622 } 623 624 func (devices *DeviceSet) migrateOldMetaData() error { 625 // Migrate old metadata file 626 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 627 if err != nil && !os.IsNotExist(err) { 628 return err 629 } 630 631 if jsonData != nil { 632 m := metaData{Devices: make(map[string]*devInfo)} 633 634 if err := json.Unmarshal(jsonData, &m); err != nil { 635 return err 636 } 637 638 for hash, info := range m.Devices { 639 info.Hash = hash 640 devices.saveMetadata(info) 641 } 642 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 643 return err 644 } 645 646 } 647 648 return nil 649 } 650 651 // Cleanup deleted devices. It assumes that all the devices have been 652 // loaded in the hash table. 653 func (devices *DeviceSet) cleanupDeletedDevices() error { 654 devices.Lock() 655 656 // If there are no deleted devices, there is nothing to do. 657 if devices.nrDeletedDevices == 0 { 658 devices.Unlock() 659 return nil 660 } 661 662 var deletedDevices []*devInfo 663 664 for _, info := range devices.Devices { 665 if !info.Deleted { 666 continue 667 } 668 logrus.Debugf("devmapper: Found deleted device %s.", info.Hash) 669 deletedDevices = append(deletedDevices, info) 670 } 671 672 // Delete the deleted devices. DeleteDevice() first takes the info lock 673 // and then devices.Lock(). So drop it to avoid deadlock. 674 devices.Unlock() 675 676 for _, info := range deletedDevices { 677 // This will again try deferred deletion. 678 if err := devices.DeleteDevice(info.Hash, false); err != nil { 679 logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err) 680 } 681 } 682 683 return nil 684 } 685 686 func (devices *DeviceSet) countDeletedDevices() { 687 for _, info := range devices.Devices { 688 if !info.Deleted { 689 continue 690 } 691 devices.nrDeletedDevices++ 692 } 693 } 694 695 func (devices *DeviceSet) startDeviceDeletionWorker() { 696 // Deferred deletion is not enabled. Don't do anything. 697 if !devices.deferredDelete { 698 return 699 } 700 701 logrus.Debugf("devmapper: Worker to cleanup deleted devices started") 702 for range devices.deletionWorkerTicker.C { 703 devices.cleanupDeletedDevices() 704 } 705 } 706 707 func (devices *DeviceSet) initMetaData() error { 708 devices.Lock() 709 defer devices.Unlock() 710 711 if err := devices.migrateOldMetaData(); err != nil { 712 return err 713 } 714 715 _, transactionID, _, _, _, _, err := devices.poolStatus() 716 if err != nil { 717 return err 718 } 719 720 devices.TransactionID = transactionID 721 722 if err := devices.loadDeviceFilesOnStart(); err != nil { 723 return fmt.Errorf("devmapper: Failed to load device files:%v", err) 724 } 725 726 devices.constructDeviceIDMap() 727 devices.countDeletedDevices() 728 729 if err := devices.processPendingTransaction(); err != nil { 730 return err 731 } 732 733 // Start a goroutine to cleanup Deleted Devices 734 go devices.startDeviceDeletionWorker() 735 return nil 736 } 737 738 func (devices *DeviceSet) incNextDeviceID() { 739 // IDs are 24bit, so wrap around 740 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 741 } 742 743 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 744 devices.incNextDeviceID() 745 for i := 0; i <= maxDeviceID; i++ { 746 if devices.isDeviceIDFree(devices.NextDeviceID) { 747 devices.markDeviceIDUsed(devices.NextDeviceID) 748 return devices.NextDeviceID, nil 749 } 750 devices.incNextDeviceID() 751 } 752 753 return 0, fmt.Errorf("devmapper: Unable to find a free device ID") 754 } 755 756 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 757 devices.Lock() 758 defer devices.Unlock() 759 760 deviceID, err := devices.getNextFreeDeviceID() 761 if err != nil { 762 return nil, err 763 } 764 765 if err := devices.openTransaction(hash, deviceID); err != nil { 766 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 767 devices.markDeviceIDFree(deviceID) 768 return nil, err 769 } 770 771 for { 772 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 773 if devicemapper.DeviceIDExists(err) { 774 // Device ID already exists. This should not 775 // happen. Now we have a mechanism to find 776 // a free device ID. So something is not right. 777 // Give a warning and continue. 778 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 779 deviceID, err = devices.getNextFreeDeviceID() 780 if err != nil { 781 return nil, err 782 } 783 // Save new device id into transaction 784 devices.refreshTransaction(deviceID) 785 continue 786 } 787 logrus.Debugf("devmapper: Error creating device: %s", err) 788 devices.markDeviceIDFree(deviceID) 789 return nil, err 790 } 791 break 792 } 793 794 logrus.Debugf("devmapper: Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 795 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 796 if err != nil { 797 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 798 devices.markDeviceIDFree(deviceID) 799 return nil, err 800 } 801 802 if err := devices.closeTransaction(); err != nil { 803 devices.unregisterDevice(deviceID, hash) 804 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 805 devices.markDeviceIDFree(deviceID) 806 return nil, err 807 } 808 return info, nil 809 } 810 811 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo) error { 812 deviceID, err := devices.getNextFreeDeviceID() 813 if err != nil { 814 return err 815 } 816 817 if err := devices.openTransaction(hash, deviceID); err != nil { 818 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 819 devices.markDeviceIDFree(deviceID) 820 return err 821 } 822 823 for { 824 if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil { 825 if devicemapper.DeviceIDExists(err) { 826 // Device ID already exists. This should not 827 // happen. Now we have a mechanism to find 828 // a free device ID. So something is not right. 829 // Give a warning and continue. 830 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 831 deviceID, err = devices.getNextFreeDeviceID() 832 if err != nil { 833 return err 834 } 835 // Save new device id into transaction 836 devices.refreshTransaction(deviceID) 837 continue 838 } 839 logrus.Debugf("devmapper: Error creating snap device: %s", err) 840 devices.markDeviceIDFree(deviceID) 841 return err 842 } 843 break 844 } 845 846 if _, err := devices.registerDevice(deviceID, hash, baseInfo.Size, devices.OpenTransactionID); err != nil { 847 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 848 devices.markDeviceIDFree(deviceID) 849 logrus.Debugf("devmapper: Error registering device: %s", err) 850 return err 851 } 852 853 if err := devices.closeTransaction(); err != nil { 854 devices.unregisterDevice(deviceID, hash) 855 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 856 devices.markDeviceIDFree(deviceID) 857 return err 858 } 859 return nil 860 } 861 862 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 863 info := &devInfo{Hash: hash, devices: devices} 864 865 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 866 if err != nil { 867 return nil 868 } 869 870 if err := json.Unmarshal(jsonData, &info); err != nil { 871 return nil 872 } 873 874 if info.DeviceID > maxDeviceID { 875 logrus.Errorf("devmapper: Ignoring Invalid DeviceId=%d", info.DeviceID) 876 return nil 877 } 878 879 return info 880 } 881 882 func getDeviceUUID(device string) (string, error) { 883 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 884 if err != nil { 885 return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err) 886 } 887 888 uuid := strings.TrimSuffix(string(out), "\n") 889 uuid = strings.TrimSpace(uuid) 890 logrus.Debugf("devmapper: UUID for device: %s is:%s", device, uuid) 891 return uuid, nil 892 } 893 894 func (devices *DeviceSet) getBaseDeviceSize() uint64 { 895 info, _ := devices.lookupDevice("") 896 if info == nil { 897 return 0 898 } 899 return info.Size 900 } 901 902 func (devices *DeviceSet) getBaseDeviceFS() string { 903 return devices.BaseDeviceFilesystem 904 } 905 906 func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error { 907 devices.Lock() 908 defer devices.Unlock() 909 910 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 911 return err 912 } 913 defer devices.deactivateDevice(baseInfo) 914 915 uuid, err := getDeviceUUID(baseInfo.DevName()) 916 if err != nil { 917 return err 918 } 919 920 if devices.BaseDeviceUUID != uuid { 921 return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID) 922 } 923 924 if devices.BaseDeviceFilesystem == "" { 925 fsType, err := ProbeFsType(baseInfo.DevName()) 926 if err != nil { 927 return err 928 } 929 if err := devices.saveBaseDeviceFilesystem(fsType); err != nil { 930 return err 931 } 932 } 933 934 // If user specified a filesystem using dm.fs option and current 935 // file system of base image is not same, warn user that dm.fs 936 // will be ignored. 937 if devices.BaseDeviceFilesystem != devices.filesystem { 938 logrus.Warnf("devmapper: Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem) 939 devices.filesystem = devices.BaseDeviceFilesystem 940 } 941 return nil 942 } 943 944 func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error { 945 devices.BaseDeviceFilesystem = fs 946 return devices.saveDeviceSetMetaData() 947 } 948 949 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 950 devices.Lock() 951 defer devices.Unlock() 952 953 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 954 return err 955 } 956 defer devices.deactivateDevice(baseInfo) 957 958 uuid, err := getDeviceUUID(baseInfo.DevName()) 959 if err != nil { 960 return err 961 } 962 963 devices.BaseDeviceUUID = uuid 964 return devices.saveDeviceSetMetaData() 965 } 966 967 func (devices *DeviceSet) createBaseImage() error { 968 logrus.Debugf("devmapper: Initializing base device-mapper thin volume") 969 970 // Create initial device 971 info, err := devices.createRegisterDevice("") 972 if err != nil { 973 return err 974 } 975 976 logrus.Debugf("devmapper: Creating filesystem on base device-mapper thin volume") 977 978 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 979 return err 980 } 981 982 if err := devices.createFilesystem(info); err != nil { 983 return err 984 } 985 986 info.Initialized = true 987 if err := devices.saveMetadata(info); err != nil { 988 info.Initialized = false 989 return err 990 } 991 992 if err := devices.saveBaseDeviceUUID(info); err != nil { 993 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 994 } 995 996 return nil 997 } 998 999 // Returns if thin pool device exists or not. If device exists, also makes 1000 // sure it is a thin pool device and not some other type of device. 1001 func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) { 1002 logrus.Debugf("devmapper: Checking for existence of the pool %s", thinPoolDevice) 1003 1004 info, err := devicemapper.GetInfo(thinPoolDevice) 1005 if err != nil { 1006 return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err) 1007 } 1008 1009 // Device does not exist. 1010 if info.Exists == 0 { 1011 return false, nil 1012 } 1013 1014 _, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice) 1015 if err != nil { 1016 return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err) 1017 } 1018 1019 if deviceType != "thin-pool" { 1020 return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice) 1021 } 1022 1023 return true, nil 1024 } 1025 1026 func (devices *DeviceSet) checkThinPool() error { 1027 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 1028 if err != nil { 1029 return err 1030 } 1031 if dataUsed != 0 { 1032 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks", 1033 devices.thinPoolDevice) 1034 } 1035 if transactionID != 0 { 1036 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 1037 devices.thinPoolDevice) 1038 } 1039 return nil 1040 } 1041 1042 // Base image is initialized properly. Either save UUID for first time (for 1043 // upgrade case or verify UUID. 1044 func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error { 1045 // If BaseDeviceUUID is nil (upgrade case), save it and return success. 1046 if devices.BaseDeviceUUID == "" { 1047 if err := devices.saveBaseDeviceUUID(baseInfo); err != nil { 1048 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1049 } 1050 return nil 1051 } 1052 1053 if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil { 1054 return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed.%v", err) 1055 } 1056 1057 return nil 1058 } 1059 1060 func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error { 1061 1062 if !userBaseSize { 1063 return nil 1064 } 1065 1066 if devices.baseFsSize < devices.getBaseDeviceSize() { 1067 return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize()))) 1068 } 1069 1070 if devices.baseFsSize == devices.getBaseDeviceSize() { 1071 return nil 1072 } 1073 1074 info.lock.Lock() 1075 defer info.lock.Unlock() 1076 1077 devices.Lock() 1078 defer devices.Unlock() 1079 1080 info.Size = devices.baseFsSize 1081 1082 if err := devices.saveMetadata(info); err != nil { 1083 // Try to remove unused device 1084 delete(devices.Devices, info.Hash) 1085 return err 1086 } 1087 1088 return devices.growFS(info) 1089 } 1090 1091 func (devices *DeviceSet) growFS(info *devInfo) error { 1092 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1093 return fmt.Errorf("Error activating devmapper device: %s", err) 1094 } 1095 1096 defer devices.deactivateDevice(info) 1097 1098 fsMountPoint := "/run/docker/mnt" 1099 if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) { 1100 if err := os.MkdirAll(fsMountPoint, 0700); err != nil { 1101 return err 1102 } 1103 defer os.RemoveAll(fsMountPoint) 1104 } 1105 1106 options := "" 1107 if devices.BaseDeviceFilesystem == "xfs" { 1108 // XFS needs nouuid or it can't mount filesystems with the same fs 1109 options = joinMountOptions(options, "nouuid") 1110 } 1111 options = joinMountOptions(options, devices.mountOptions) 1112 1113 if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil { 1114 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err) 1115 } 1116 1117 defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH) 1118 1119 switch devices.BaseDeviceFilesystem { 1120 case "ext4": 1121 if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil { 1122 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1123 } 1124 case "xfs": 1125 if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil { 1126 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1127 } 1128 default: 1129 return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem) 1130 } 1131 return nil 1132 } 1133 1134 func (devices *DeviceSet) setupBaseImage() error { 1135 oldInfo, _ := devices.lookupDeviceWithLock("") 1136 1137 // base image already exists. If it is initialized properly, do UUID 1138 // verification and return. Otherwise remove image and set it up 1139 // fresh. 1140 1141 if oldInfo != nil { 1142 if oldInfo.Initialized && !oldInfo.Deleted { 1143 if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil { 1144 return err 1145 } 1146 1147 if err := devices.checkGrowBaseDeviceFS(oldInfo); err != nil { 1148 return err 1149 } 1150 1151 return nil 1152 } 1153 1154 logrus.Debugf("devmapper: Removing uninitialized base image") 1155 // If previous base device is in deferred delete state, 1156 // that needs to be cleaned up first. So don't try 1157 // deferred deletion. 1158 if err := devices.DeleteDevice("", true); err != nil { 1159 return err 1160 } 1161 } 1162 1163 // If we are setting up base image for the first time, make sure 1164 // thin pool is empty. 1165 if devices.thinPoolDevice != "" && oldInfo == nil { 1166 if err := devices.checkThinPool(); err != nil { 1167 return err 1168 } 1169 } 1170 1171 // Create new base image device 1172 if err := devices.createBaseImage(); err != nil { 1173 return err 1174 } 1175 1176 return nil 1177 } 1178 1179 func setCloseOnExec(name string) { 1180 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 1181 for _, i := range fileInfos { 1182 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 1183 if link == name { 1184 fd, err := strconv.Atoi(i.Name()) 1185 if err == nil { 1186 syscall.CloseOnExec(fd) 1187 } 1188 } 1189 } 1190 } 1191 } 1192 1193 // DMLog implements logging using DevMapperLogger interface. 1194 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 1195 // By default libdm sends us all the messages including debug ones. 1196 // We need to filter out messages here and figure out which one 1197 // should be printed. 1198 if level > logLevel { 1199 return 1200 } 1201 1202 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1203 if level <= devicemapper.LogLevelErr { 1204 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1205 } else if level <= devicemapper.LogLevelInfo { 1206 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1207 } else { 1208 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1209 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1210 } 1211 } 1212 1213 func major(device uint64) uint64 { 1214 return (device >> 8) & 0xfff 1215 } 1216 1217 func minor(device uint64) uint64 { 1218 return (device & 0xff) | ((device >> 12) & 0xfff00) 1219 } 1220 1221 // ResizePool increases the size of the pool. 1222 func (devices *DeviceSet) ResizePool(size int64) error { 1223 dirname := devices.loopbackDir() 1224 datafilename := path.Join(dirname, "data") 1225 if len(devices.dataDevice) > 0 { 1226 datafilename = devices.dataDevice 1227 } 1228 metadatafilename := path.Join(dirname, "metadata") 1229 if len(devices.metadataDevice) > 0 { 1230 metadatafilename = devices.metadataDevice 1231 } 1232 1233 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 1234 if datafile == nil { 1235 return err 1236 } 1237 defer datafile.Close() 1238 1239 fi, err := datafile.Stat() 1240 if fi == nil { 1241 return err 1242 } 1243 1244 if fi.Size() > size { 1245 return fmt.Errorf("devmapper: Can't shrink file") 1246 } 1247 1248 dataloopback := loopback.FindLoopDeviceFor(datafile) 1249 if dataloopback == nil { 1250 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename) 1251 } 1252 defer dataloopback.Close() 1253 1254 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 1255 if metadatafile == nil { 1256 return err 1257 } 1258 defer metadatafile.Close() 1259 1260 metadataloopback := loopback.FindLoopDeviceFor(metadatafile) 1261 if metadataloopback == nil { 1262 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename) 1263 } 1264 defer metadataloopback.Close() 1265 1266 // Grow loopback file 1267 if err := datafile.Truncate(size); err != nil { 1268 return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err) 1269 } 1270 1271 // Reload size for loopback device 1272 if err := loopback.SetCapacity(dataloopback); err != nil { 1273 return fmt.Errorf("Unable to update loopback capacity: %s", err) 1274 } 1275 1276 // Suspend the pool 1277 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 1278 return fmt.Errorf("devmapper: Unable to suspend pool: %s", err) 1279 } 1280 1281 // Reload with the new block sizes 1282 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 1283 return fmt.Errorf("devmapper: Unable to reload pool: %s", err) 1284 } 1285 1286 // Resume the pool 1287 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 1288 return fmt.Errorf("devmapper: Unable to resume pool: %s", err) 1289 } 1290 1291 return nil 1292 } 1293 1294 func (devices *DeviceSet) loadTransactionMetaData() error { 1295 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 1296 if err != nil { 1297 // There is no active transaction. This will be the case 1298 // during upgrade. 1299 if os.IsNotExist(err) { 1300 devices.OpenTransactionID = devices.TransactionID 1301 return nil 1302 } 1303 return err 1304 } 1305 1306 json.Unmarshal(jsonData, &devices.transaction) 1307 return nil 1308 } 1309 1310 func (devices *DeviceSet) saveTransactionMetaData() error { 1311 jsonData, err := json.Marshal(&devices.transaction) 1312 if err != nil { 1313 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1314 } 1315 1316 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 1317 } 1318 1319 func (devices *DeviceSet) removeTransactionMetaData() error { 1320 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 1321 return err 1322 } 1323 return nil 1324 } 1325 1326 func (devices *DeviceSet) rollbackTransaction() error { 1327 logrus.Debugf("devmapper: Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 1328 1329 // A device id might have already been deleted before transaction 1330 // closed. In that case this call will fail. Just leave a message 1331 // in case of failure. 1332 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 1333 logrus.Errorf("devmapper: Unable to delete device: %s", err) 1334 } 1335 1336 dinfo := &devInfo{Hash: devices.DeviceIDHash} 1337 if err := devices.removeMetadata(dinfo); err != nil { 1338 logrus.Errorf("devmapper: Unable to remove metadata: %s", err) 1339 } else { 1340 devices.markDeviceIDFree(devices.DeviceID) 1341 } 1342 1343 if err := devices.removeTransactionMetaData(); err != nil { 1344 logrus.Errorf("devmapper: Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 1345 } 1346 1347 return nil 1348 } 1349 1350 func (devices *DeviceSet) processPendingTransaction() error { 1351 if err := devices.loadTransactionMetaData(); err != nil { 1352 return err 1353 } 1354 1355 // If there was open transaction but pool transaction ID is same 1356 // as open transaction ID, nothing to roll back. 1357 if devices.TransactionID == devices.OpenTransactionID { 1358 return nil 1359 } 1360 1361 // If open transaction ID is less than pool transaction ID, something 1362 // is wrong. Bail out. 1363 if devices.OpenTransactionID < devices.TransactionID { 1364 logrus.Errorf("devmapper: Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1365 return nil 1366 } 1367 1368 // Pool transaction ID is not same as open transaction. There is 1369 // a transaction which was not completed. 1370 if err := devices.rollbackTransaction(); err != nil { 1371 return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err) 1372 } 1373 1374 devices.OpenTransactionID = devices.TransactionID 1375 return nil 1376 } 1377 1378 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1379 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1380 if err != nil { 1381 // For backward compatibility return success if file does 1382 // not exist. 1383 if os.IsNotExist(err) { 1384 return nil 1385 } 1386 return err 1387 } 1388 1389 return json.Unmarshal(jsonData, devices) 1390 } 1391 1392 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1393 jsonData, err := json.Marshal(devices) 1394 if err != nil { 1395 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1396 } 1397 1398 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1399 } 1400 1401 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1402 devices.allocateTransactionID() 1403 devices.DeviceIDHash = hash 1404 devices.DeviceID = DeviceID 1405 if err := devices.saveTransactionMetaData(); err != nil { 1406 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1407 } 1408 return nil 1409 } 1410 1411 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1412 devices.DeviceID = DeviceID 1413 if err := devices.saveTransactionMetaData(); err != nil { 1414 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1415 } 1416 return nil 1417 } 1418 1419 func (devices *DeviceSet) closeTransaction() error { 1420 if err := devices.updatePoolTransactionID(); err != nil { 1421 logrus.Debugf("devmapper: Failed to close Transaction") 1422 return err 1423 } 1424 return nil 1425 } 1426 1427 func determineDriverCapabilities(version string) error { 1428 /* 1429 * Driver version 4.27.0 and greater support deferred activation 1430 * feature. 1431 */ 1432 1433 logrus.Debugf("devicemapper: driver version is %s", version) 1434 1435 versionSplit := strings.Split(version, ".") 1436 major, err := strconv.Atoi(versionSplit[0]) 1437 if err != nil { 1438 return graphdriver.ErrNotSupported 1439 } 1440 1441 if major > 4 { 1442 driverDeferredRemovalSupport = true 1443 return nil 1444 } 1445 1446 if major < 4 { 1447 return nil 1448 } 1449 1450 minor, err := strconv.Atoi(versionSplit[1]) 1451 if err != nil { 1452 return graphdriver.ErrNotSupported 1453 } 1454 1455 /* 1456 * If major is 4 and minor is 27, then there is no need to 1457 * check for patch level as it can not be less than 0. 1458 */ 1459 if minor >= 27 { 1460 driverDeferredRemovalSupport = true 1461 return nil 1462 } 1463 1464 return nil 1465 } 1466 1467 // Determine the major and minor number of loopback device 1468 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1469 stat, err := file.Stat() 1470 if err != nil { 1471 return 0, 0, err 1472 } 1473 1474 dev := stat.Sys().(*syscall.Stat_t).Rdev 1475 majorNum := major(dev) 1476 minorNum := minor(dev) 1477 1478 logrus.Debugf("devmapper: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1479 return majorNum, minorNum, nil 1480 } 1481 1482 // Given a file which is backing file of a loop back device, find the 1483 // loopback device name and its major/minor number. 1484 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1485 file, err := os.Open(filename) 1486 if err != nil { 1487 logrus.Debugf("devmapper: Failed to open file %s", filename) 1488 return "", 0, 0, err 1489 } 1490 1491 defer file.Close() 1492 loopbackDevice := loopback.FindLoopDeviceFor(file) 1493 if loopbackDevice == nil { 1494 return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename) 1495 } 1496 defer loopbackDevice.Close() 1497 1498 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1499 if err != nil { 1500 return "", 0, 0, err 1501 } 1502 return loopbackDevice.Name(), Major, Minor, nil 1503 } 1504 1505 // Get the major/minor numbers of thin pool data and metadata devices 1506 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1507 var params, poolDataMajMin, poolMetadataMajMin string 1508 1509 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1510 if err != nil { 1511 return 0, 0, 0, 0, err 1512 } 1513 1514 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1515 return 0, 0, 0, 0, err 1516 } 1517 1518 logrus.Debugf("devmapper: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1519 1520 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1521 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1522 if err != nil { 1523 return 0, 0, 0, 0, err 1524 } 1525 1526 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1527 if err != nil { 1528 return 0, 0, 0, 0, err 1529 } 1530 1531 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1532 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1533 if err != nil { 1534 return 0, 0, 0, 0, err 1535 } 1536 1537 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1538 if err != nil { 1539 return 0, 0, 0, 0, err 1540 } 1541 1542 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1543 } 1544 1545 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1546 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1547 if err != nil { 1548 return err 1549 } 1550 1551 dirname := devices.loopbackDir() 1552 1553 // data device has not been passed in. So there should be a data file 1554 // which is being mounted as loop device. 1555 if devices.dataDevice == "" { 1556 datafilename := path.Join(dirname, "data") 1557 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1558 if err != nil { 1559 return err 1560 } 1561 1562 // Compare the two 1563 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1564 devices.dataDevice = dataLoopDevice 1565 devices.dataLoopFile = datafilename 1566 } 1567 1568 } 1569 1570 // metadata device has not been passed in. So there should be a 1571 // metadata file which is being mounted as loop device. 1572 if devices.metadataDevice == "" { 1573 metadatafilename := path.Join(dirname, "metadata") 1574 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1575 if err != nil { 1576 return err 1577 } 1578 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1579 devices.metadataDevice = metadataLoopDevice 1580 devices.metadataLoopFile = metadatafilename 1581 } 1582 } 1583 1584 return nil 1585 } 1586 1587 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1588 // give ourselves to libdm as a log handler 1589 devicemapper.LogInit(devices) 1590 1591 version, err := devicemapper.GetDriverVersion() 1592 if err != nil { 1593 // Can't even get driver version, assume not supported 1594 return graphdriver.ErrNotSupported 1595 } 1596 1597 if err := determineDriverCapabilities(version); err != nil { 1598 return graphdriver.ErrNotSupported 1599 } 1600 1601 // If user asked for deferred removal then check both libdm library 1602 // and kernel driver support deferred removal otherwise error out. 1603 if enableDeferredRemoval { 1604 if !driverDeferredRemovalSupport { 1605 return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it") 1606 } 1607 if !devicemapper.LibraryDeferredRemovalSupport { 1608 return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it") 1609 } 1610 logrus.Debugf("devmapper: Deferred removal support enabled.") 1611 devices.deferredRemove = true 1612 } 1613 1614 if enableDeferredDeletion { 1615 if !devices.deferredRemove { 1616 return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter") 1617 } 1618 logrus.Debugf("devmapper: Deferred deletion support enabled.") 1619 devices.deferredDelete = true 1620 } 1621 1622 // https://github.com/docker/docker/issues/4036 1623 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1624 logrus.Warn("devmapper: Udev sync is not supported. This will lead to unexpected behavior, data loss and errors. For more information, see https://docs.docker.com/reference/commandline/daemon/#daemon-storage-driver-option") 1625 } 1626 1627 //create the root dir of the devmapper driver ownership to match this 1628 //daemon's remapped root uid/gid so containers can start properly 1629 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 1630 if err != nil { 1631 return err 1632 } 1633 if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) { 1634 return err 1635 } 1636 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) { 1637 return err 1638 } 1639 1640 // Set the device prefix from the device id and inode of the docker root dir 1641 1642 st, err := os.Stat(devices.root) 1643 if err != nil { 1644 return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err) 1645 } 1646 sysSt := st.Sys().(*syscall.Stat_t) 1647 // "reg-" stands for "regular file". 1648 // In the future we might use "dev-" for "device file", etc. 1649 // docker-maj,min[-inode] stands for: 1650 // - Managed by docker 1651 // - The target of this device is at major <maj> and minor <min> 1652 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1653 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1654 logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix) 1655 1656 // Check for the existence of the thin-pool device 1657 poolExists, err := devices.thinPoolExists(devices.getPoolName()) 1658 if err != nil { 1659 return err 1660 } 1661 1662 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1663 // that are not Close-on-exec, 1664 // so we add this badhack to make sure it closes itself 1665 setCloseOnExec("/dev/mapper/control") 1666 1667 // Make sure the sparse images exist in <root>/devicemapper/data and 1668 // <root>/devicemapper/metadata 1669 1670 createdLoopback := false 1671 1672 // If the pool doesn't exist, create it 1673 if !poolExists && devices.thinPoolDevice == "" { 1674 logrus.Debugf("devmapper: Pool doesn't exist. Creating it.") 1675 1676 var ( 1677 dataFile *os.File 1678 metadataFile *os.File 1679 ) 1680 1681 if devices.dataDevice == "" { 1682 // Make sure the sparse images exist in <root>/devicemapper/data 1683 1684 hasData := devices.hasImage("data") 1685 1686 if !doInit && !hasData { 1687 return errors.New("Loopback data file not found") 1688 } 1689 1690 if !hasData { 1691 createdLoopback = true 1692 } 1693 1694 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1695 if err != nil { 1696 logrus.Debugf("devmapper: Error device ensureImage (data): %s", err) 1697 return err 1698 } 1699 1700 dataFile, err = loopback.AttachLoopDevice(data) 1701 if err != nil { 1702 return err 1703 } 1704 devices.dataLoopFile = data 1705 devices.dataDevice = dataFile.Name() 1706 } else { 1707 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1708 if err != nil { 1709 return err 1710 } 1711 } 1712 defer dataFile.Close() 1713 1714 if devices.metadataDevice == "" { 1715 // Make sure the sparse images exist in <root>/devicemapper/metadata 1716 1717 hasMetadata := devices.hasImage("metadata") 1718 1719 if !doInit && !hasMetadata { 1720 return errors.New("Loopback metadata file not found") 1721 } 1722 1723 if !hasMetadata { 1724 createdLoopback = true 1725 } 1726 1727 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1728 if err != nil { 1729 logrus.Debugf("devmapper: Error device ensureImage (metadata): %s", err) 1730 return err 1731 } 1732 1733 metadataFile, err = loopback.AttachLoopDevice(metadata) 1734 if err != nil { 1735 return err 1736 } 1737 devices.metadataLoopFile = metadata 1738 devices.metadataDevice = metadataFile.Name() 1739 } else { 1740 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1741 if err != nil { 1742 return err 1743 } 1744 } 1745 defer metadataFile.Close() 1746 1747 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1748 return err 1749 } 1750 } 1751 1752 // Pool already exists and caller did not pass us a pool. That means 1753 // we probably created pool earlier and could not remove it as some 1754 // containers were still using it. Detect some of the properties of 1755 // pool, like is it using loop devices. 1756 if poolExists && devices.thinPoolDevice == "" { 1757 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1758 logrus.Debugf("devmapper: Failed to load thin pool loopback device information:%v", err) 1759 return err 1760 } 1761 } 1762 1763 // If we didn't just create the data or metadata image, we need to 1764 // load the transaction id and migrate old metadata 1765 if !createdLoopback { 1766 if err := devices.initMetaData(); err != nil { 1767 return err 1768 } 1769 } 1770 1771 if devices.thinPoolDevice == "" { 1772 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1773 logrus.Warnf("devmapper: Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1774 } 1775 } 1776 1777 // Right now this loads only NextDeviceID. If there is more metadata 1778 // down the line, we might have to move it earlier. 1779 if err := devices.loadDeviceSetMetaData(); err != nil { 1780 return err 1781 } 1782 1783 // Setup the base image 1784 if doInit { 1785 if err := devices.setupBaseImage(); err != nil { 1786 logrus.Debugf("devmapper: Error device setupBaseImage: %s", err) 1787 return err 1788 } 1789 } 1790 1791 return nil 1792 } 1793 1794 // AddDevice adds a device and registers in the hash. 1795 func (devices *DeviceSet) AddDevice(hash, baseHash string) error { 1796 logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash) 1797 defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1798 1799 // If a deleted device exists, return error. 1800 baseInfo, err := devices.lookupDeviceWithLock(baseHash) 1801 if err != nil { 1802 return err 1803 } 1804 1805 if baseInfo.Deleted { 1806 return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash) 1807 } 1808 1809 baseInfo.lock.Lock() 1810 defer baseInfo.lock.Unlock() 1811 1812 devices.Lock() 1813 defer devices.Unlock() 1814 1815 // Also include deleted devices in case hash of new device is 1816 // same as one of the deleted devices. 1817 if info, _ := devices.lookupDevice(hash); info != nil { 1818 return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted) 1819 } 1820 1821 if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil { 1822 return err 1823 } 1824 1825 return nil 1826 } 1827 1828 func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error { 1829 // If device is already in deleted state, there is nothing to be done. 1830 if info.Deleted { 1831 return nil 1832 } 1833 1834 logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash) 1835 1836 info.Deleted = true 1837 1838 // save device metadata to reflect deleted state. 1839 if err := devices.saveMetadata(info); err != nil { 1840 info.Deleted = false 1841 return err 1842 } 1843 1844 devices.nrDeletedDevices++ 1845 return nil 1846 } 1847 1848 // Should be called with devices.Lock() held. 1849 func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error { 1850 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1851 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceId = %d", "", info.DeviceID) 1852 return err 1853 } 1854 1855 defer devices.closeTransaction() 1856 1857 err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID) 1858 if err != nil { 1859 // If syncDelete is true, we want to return error. If deferred 1860 // deletion is not enabled, we return an error. If error is 1861 // something other then EBUSY, return an error. 1862 if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy { 1863 logrus.Debugf("devmapper: Error deleting device: %s", err) 1864 return err 1865 } 1866 } 1867 1868 if err == nil { 1869 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 1870 return err 1871 } 1872 // If device was already in deferred delete state that means 1873 // deletion was being tried again later. Reduce the deleted 1874 // device count. 1875 if info.Deleted { 1876 devices.nrDeletedDevices-- 1877 } 1878 devices.markDeviceIDFree(info.DeviceID) 1879 } else { 1880 if err := devices.markForDeferredDeletion(info); err != nil { 1881 return err 1882 } 1883 } 1884 1885 return nil 1886 } 1887 1888 // Issue discard only if device open count is zero. 1889 func (devices *DeviceSet) issueDiscard(info *devInfo) error { 1890 logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash) 1891 defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash) 1892 // This is a workaround for the kernel not discarding block so 1893 // on the thin pool when we remove a thinp device, so we do it 1894 // manually. 1895 // Even if device is deferred deleted, activate it and issue 1896 // discards. 1897 if err := devices.activateDeviceIfNeeded(info, true); err != nil { 1898 return err 1899 } 1900 1901 devinfo, err := devicemapper.GetInfo(info.Name()) 1902 if err != nil { 1903 return err 1904 } 1905 1906 if devinfo.OpenCount != 0 { 1907 logrus.Debugf("devmapper: Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount) 1908 return nil 1909 } 1910 1911 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 1912 logrus.Debugf("devmapper: Error discarding block on device: %s (ignoring)", err) 1913 } 1914 return nil 1915 } 1916 1917 // Should be called with devices.Lock() held. 1918 func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error { 1919 if devices.doBlkDiscard { 1920 devices.issueDiscard(info) 1921 } 1922 1923 // Try to deactivate device in case it is active. 1924 if err := devices.deactivateDevice(info); err != nil { 1925 logrus.Debugf("devmapper: Error deactivating device: %s", err) 1926 return err 1927 } 1928 1929 if err := devices.deleteTransaction(info, syncDelete); err != nil { 1930 return err 1931 } 1932 1933 return nil 1934 } 1935 1936 // DeleteDevice will return success if device has been marked for deferred 1937 // removal. If one wants to override that and want DeleteDevice() to fail if 1938 // device was busy and could not be deleted, set syncDelete=true. 1939 func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error { 1940 logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete) 1941 defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete) 1942 info, err := devices.lookupDeviceWithLock(hash) 1943 if err != nil { 1944 return err 1945 } 1946 1947 info.lock.Lock() 1948 defer info.lock.Unlock() 1949 1950 devices.Lock() 1951 defer devices.Unlock() 1952 1953 // If mountcount is not zero, that means devices is still in use 1954 // or has not been Put() properly. Fail device deletion. 1955 1956 if info.mountCount != 0 { 1957 return fmt.Errorf("devmapper: Can't delete device %v as it is still mounted. mntCount=%v", info.Hash, info.mountCount) 1958 } 1959 1960 return devices.deleteDevice(info, syncDelete) 1961 } 1962 1963 func (devices *DeviceSet) deactivatePool() error { 1964 logrus.Debugf("devmapper: deactivatePool()") 1965 defer logrus.Debugf("devmapper: deactivatePool END") 1966 devname := devices.getPoolDevName() 1967 1968 devinfo, err := devicemapper.GetInfo(devname) 1969 if err != nil { 1970 return err 1971 } 1972 1973 if devinfo.Exists == 0 { 1974 return nil 1975 } 1976 if err := devicemapper.RemoveDevice(devname); err != nil { 1977 return err 1978 } 1979 1980 if d, err := devicemapper.GetDeps(devname); err == nil { 1981 logrus.Warnf("devmapper: device %s still has %d active dependents", devname, d.Count) 1982 } 1983 1984 return nil 1985 } 1986 1987 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 1988 logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash) 1989 defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash) 1990 1991 devinfo, err := devicemapper.GetInfo(info.Name()) 1992 if err != nil { 1993 return err 1994 } 1995 1996 if devinfo.Exists == 0 { 1997 return nil 1998 } 1999 2000 if devices.deferredRemove { 2001 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 2002 return err 2003 } 2004 } else { 2005 if err := devices.removeDevice(info.Name()); err != nil { 2006 return err 2007 } 2008 } 2009 return nil 2010 } 2011 2012 // Issues the underlying dm remove operation. 2013 func (devices *DeviceSet) removeDevice(devname string) error { 2014 var err error 2015 2016 logrus.Debugf("devmapper: removeDevice START(%s)", devname) 2017 defer logrus.Debugf("devmapper: removeDevice END(%s)", devname) 2018 2019 for i := 0; i < 200; i++ { 2020 err = devicemapper.RemoveDevice(devname) 2021 if err == nil { 2022 break 2023 } 2024 if err != devicemapper.ErrBusy { 2025 return err 2026 } 2027 2028 // If we see EBUSY it may be a transient error, 2029 // sleep a bit a retry a few times. 2030 devices.Unlock() 2031 time.Sleep(100 * time.Millisecond) 2032 devices.Lock() 2033 } 2034 2035 return err 2036 } 2037 2038 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 2039 if !devices.deferredRemove { 2040 return nil 2041 } 2042 2043 logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name()) 2044 defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name()) 2045 2046 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 2047 2048 if devinfo != nil && devinfo.DeferredRemove == 0 { 2049 return nil 2050 } 2051 2052 // Cancel deferred remove 2053 for i := 0; i < 100; i++ { 2054 err = devicemapper.CancelDeferredRemove(info.Name()) 2055 if err == nil { 2056 break 2057 } 2058 2059 if err == devicemapper.ErrEnxio { 2060 // Device is probably already gone. Return success. 2061 return nil 2062 } 2063 2064 if err != devicemapper.ErrBusy { 2065 return err 2066 } 2067 2068 // If we see EBUSY it may be a transient error, 2069 // sleep a bit a retry a few times. 2070 devices.Unlock() 2071 time.Sleep(100 * time.Millisecond) 2072 devices.Lock() 2073 } 2074 return err 2075 } 2076 2077 // Shutdown shuts down the device by unmounting the root. 2078 func (devices *DeviceSet) Shutdown() error { 2079 logrus.Debugf("devmapper: [deviceset %s] Shutdown()", devices.devicePrefix) 2080 logrus.Debugf("devmapper: Shutting down DeviceSet: %s", devices.root) 2081 defer logrus.Debugf("devmapper: [deviceset %s] Shutdown() END", devices.devicePrefix) 2082 2083 var devs []*devInfo 2084 2085 // Stop deletion worker. This should start delivering new events to 2086 // ticker channel. That means no new instance of cleanupDeletedDevice() 2087 // will run after this call. If one instance is already running at 2088 // the time of the call, it must be holding devices.Lock() and 2089 // we will block on this lock till cleanup function exits. 2090 devices.deletionWorkerTicker.Stop() 2091 2092 devices.Lock() 2093 // Save DeviceSet Metadata first. Docker kills all threads if they 2094 // don't finish in certain time. It is possible that Shutdown() 2095 // routine does not finish in time as we loop trying to deactivate 2096 // some devices while these are busy. In that case shutdown() routine 2097 // will be killed and we will not get a chance to save deviceset 2098 // metadata. Hence save this early before trying to deactivate devices. 2099 devices.saveDeviceSetMetaData() 2100 2101 for _, info := range devices.Devices { 2102 devs = append(devs, info) 2103 } 2104 devices.Unlock() 2105 2106 for _, info := range devs { 2107 info.lock.Lock() 2108 if info.mountCount > 0 { 2109 // We use MNT_DETACH here in case it is still busy in some running 2110 // container. This means it'll go away from the global scope directly, 2111 // and the device will be released when that container dies. 2112 if err := syscall.Unmount(info.mountPath, syscall.MNT_DETACH); err != nil { 2113 logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", info.mountPath, err) 2114 } 2115 2116 devices.Lock() 2117 if err := devices.deactivateDevice(info); err != nil { 2118 logrus.Debugf("devmapper: Shutdown deactivate %s , error: %s", info.Hash, err) 2119 } 2120 devices.Unlock() 2121 } 2122 info.lock.Unlock() 2123 } 2124 2125 info, _ := devices.lookupDeviceWithLock("") 2126 if info != nil { 2127 info.lock.Lock() 2128 devices.Lock() 2129 if err := devices.deactivateDevice(info); err != nil { 2130 logrus.Debugf("devmapper: Shutdown deactivate base , error: %s", err) 2131 } 2132 devices.Unlock() 2133 info.lock.Unlock() 2134 } 2135 2136 devices.Lock() 2137 if devices.thinPoolDevice == "" { 2138 if err := devices.deactivatePool(); err != nil { 2139 logrus.Debugf("devmapper: Shutdown deactivate pool , error: %s", err) 2140 } 2141 } 2142 devices.Unlock() 2143 2144 return nil 2145 } 2146 2147 // MountDevice mounts the device if not already mounted. 2148 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 2149 info, err := devices.lookupDeviceWithLock(hash) 2150 if err != nil { 2151 return err 2152 } 2153 2154 if info.Deleted { 2155 return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash) 2156 } 2157 2158 info.lock.Lock() 2159 defer info.lock.Unlock() 2160 2161 devices.Lock() 2162 defer devices.Unlock() 2163 2164 if info.mountCount > 0 { 2165 if path != info.mountPath { 2166 return fmt.Errorf("devmapper: Trying to mount devmapper device in multiple places (%s, %s)", info.mountPath, path) 2167 } 2168 2169 info.mountCount++ 2170 return nil 2171 } 2172 2173 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2174 return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2175 } 2176 2177 fstype, err := ProbeFsType(info.DevName()) 2178 if err != nil { 2179 return err 2180 } 2181 2182 options := "" 2183 2184 if fstype == "xfs" { 2185 // XFS needs nouuid or it can't mount filesystems with the same fs 2186 options = joinMountOptions(options, "nouuid") 2187 } 2188 2189 options = joinMountOptions(options, devices.mountOptions) 2190 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 2191 2192 if err := mount.Mount(info.DevName(), path, fstype, options); err != nil { 2193 return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err) 2194 } 2195 2196 info.mountCount = 1 2197 info.mountPath = path 2198 2199 return nil 2200 } 2201 2202 // UnmountDevice unmounts the device and removes it from hash. 2203 func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { 2204 logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash) 2205 defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash) 2206 2207 info, err := devices.lookupDeviceWithLock(hash) 2208 if err != nil { 2209 return err 2210 } 2211 2212 info.lock.Lock() 2213 defer info.lock.Unlock() 2214 2215 devices.Lock() 2216 defer devices.Unlock() 2217 2218 // If there are running containers when daemon crashes, during daemon 2219 // restarting, it will kill running containers and will finally call 2220 // Put() without calling Get(). So info.MountCount may become negative. 2221 // if info.mountCount goes negative, we do the unmount and assign 2222 // it to 0. 2223 2224 info.mountCount-- 2225 if info.mountCount > 0 { 2226 return nil 2227 } else if info.mountCount < 0 { 2228 logrus.Warnf("devmapper: Mount count of device went negative. Put() called without matching Get(). Resetting count to 0") 2229 info.mountCount = 0 2230 } 2231 2232 logrus.Debugf("devmapper: Unmount(%s)", mountPath) 2233 if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil { 2234 return err 2235 } 2236 logrus.Debugf("devmapper: Unmount done") 2237 2238 if err := devices.deactivateDevice(info); err != nil { 2239 return err 2240 } 2241 2242 info.mountPath = "" 2243 2244 return nil 2245 } 2246 2247 // HasDevice returns true if the device metadata exists. 2248 func (devices *DeviceSet) HasDevice(hash string) bool { 2249 info, _ := devices.lookupDeviceWithLock(hash) 2250 return info != nil 2251 } 2252 2253 // List returns a list of device ids. 2254 func (devices *DeviceSet) List() []string { 2255 devices.Lock() 2256 defer devices.Unlock() 2257 2258 ids := make([]string, len(devices.Devices)) 2259 i := 0 2260 for k := range devices.Devices { 2261 ids[i] = k 2262 i++ 2263 } 2264 return ids 2265 } 2266 2267 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 2268 var params string 2269 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 2270 if err != nil { 2271 return 2272 } 2273 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 2274 return 2275 } 2276 return 2277 } 2278 2279 // GetDeviceStatus provides size, mapped sectors 2280 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 2281 info, err := devices.lookupDeviceWithLock(hash) 2282 if err != nil { 2283 return nil, err 2284 } 2285 2286 info.lock.Lock() 2287 defer info.lock.Unlock() 2288 2289 devices.Lock() 2290 defer devices.Unlock() 2291 2292 status := &DevStatus{ 2293 DeviceID: info.DeviceID, 2294 Size: info.Size, 2295 TransactionID: info.TransactionID, 2296 } 2297 2298 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2299 return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2300 } 2301 2302 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 2303 2304 if err != nil { 2305 return nil, err 2306 } 2307 2308 status.SizeInSectors = sizeInSectors 2309 status.MappedSectors = mappedSectors 2310 status.HighestMappedSector = highestMappedSector 2311 2312 return status, nil 2313 } 2314 2315 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 2316 var params string 2317 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 2318 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 2319 } 2320 return 2321 } 2322 2323 // DataDevicePath returns the path to the data storage for this deviceset, 2324 // regardless of loopback or block device 2325 func (devices *DeviceSet) DataDevicePath() string { 2326 return devices.dataDevice 2327 } 2328 2329 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 2330 // regardless of loopback or block device 2331 func (devices *DeviceSet) MetadataDevicePath() string { 2332 return devices.metadataDevice 2333 } 2334 2335 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 2336 buf := new(syscall.Statfs_t) 2337 if err := syscall.Statfs(loopFile, buf); err != nil { 2338 logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err) 2339 return 0, err 2340 } 2341 return buf.Bfree * uint64(buf.Bsize), nil 2342 } 2343 2344 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 2345 if loopFile != "" { 2346 fi, err := os.Stat(loopFile) 2347 if err != nil { 2348 logrus.Warnf("devmapper: Couldn't stat loopfile %v: %v", loopFile, err) 2349 return false, err 2350 } 2351 return fi.Mode().IsRegular(), nil 2352 } 2353 return false, nil 2354 } 2355 2356 // Status returns the current status of this deviceset 2357 func (devices *DeviceSet) Status() *Status { 2358 devices.Lock() 2359 defer devices.Unlock() 2360 2361 status := &Status{} 2362 2363 status.PoolName = devices.getPoolName() 2364 status.DataFile = devices.DataDevicePath() 2365 status.DataLoopback = devices.dataLoopFile 2366 status.MetadataFile = devices.MetadataDevicePath() 2367 status.MetadataLoopback = devices.metadataLoopFile 2368 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 2369 status.DeferredRemoveEnabled = devices.deferredRemove 2370 status.DeferredDeleteEnabled = devices.deferredDelete 2371 status.DeferredDeletedDeviceCount = devices.nrDeletedDevices 2372 status.BaseDeviceSize = devices.getBaseDeviceSize() 2373 status.BaseDeviceFS = devices.getBaseDeviceFS() 2374 2375 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 2376 if err == nil { 2377 // Convert from blocks to bytes 2378 blockSizeInSectors := totalSizeInSectors / dataTotal 2379 2380 status.Data.Used = dataUsed * blockSizeInSectors * 512 2381 status.Data.Total = dataTotal * blockSizeInSectors * 512 2382 status.Data.Available = status.Data.Total - status.Data.Used 2383 2384 // metadata blocks are always 4k 2385 status.Metadata.Used = metadataUsed * 4096 2386 status.Metadata.Total = metadataTotal * 4096 2387 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 2388 2389 status.SectorSize = blockSizeInSectors * 512 2390 2391 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 2392 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 2393 if err == nil && actualSpace < status.Data.Available { 2394 status.Data.Available = actualSpace 2395 } 2396 } 2397 2398 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 2399 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 2400 if err == nil && actualSpace < status.Metadata.Available { 2401 status.Metadata.Available = actualSpace 2402 } 2403 } 2404 } 2405 2406 return status 2407 } 2408 2409 // Status returns the current status of this deviceset 2410 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 2411 info, err := devices.lookupDeviceWithLock(hash) 2412 if err != nil { 2413 return nil, err 2414 } 2415 2416 info.lock.Lock() 2417 defer info.lock.Unlock() 2418 2419 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 2420 return metadata, nil 2421 } 2422 2423 // NewDeviceSet creates the device set based on the options provided. 2424 func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) { 2425 devicemapper.SetDevDir("/dev") 2426 2427 devices := &DeviceSet{ 2428 root: root, 2429 metaData: metaData{Devices: make(map[string]*devInfo)}, 2430 dataLoopbackSize: defaultDataLoopbackSize, 2431 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 2432 baseFsSize: defaultBaseFsSize, 2433 overrideUdevSyncCheck: defaultUdevSyncOverride, 2434 doBlkDiscard: true, 2435 thinpBlockSize: defaultThinpBlockSize, 2436 deviceIDMap: make([]byte, deviceIDMapSz), 2437 deletionWorkerTicker: time.NewTicker(time.Second * 30), 2438 uidMaps: uidMaps, 2439 gidMaps: gidMaps, 2440 } 2441 2442 foundBlkDiscard := false 2443 for _, option := range options { 2444 key, val, err := parsers.ParseKeyValueOpt(option) 2445 if err != nil { 2446 return nil, err 2447 } 2448 key = strings.ToLower(key) 2449 switch key { 2450 case "dm.basesize": 2451 size, err := units.RAMInBytes(val) 2452 if err != nil { 2453 return nil, err 2454 } 2455 userBaseSize = true 2456 devices.baseFsSize = uint64(size) 2457 case "dm.loopdatasize": 2458 size, err := units.RAMInBytes(val) 2459 if err != nil { 2460 return nil, err 2461 } 2462 devices.dataLoopbackSize = size 2463 case "dm.loopmetadatasize": 2464 size, err := units.RAMInBytes(val) 2465 if err != nil { 2466 return nil, err 2467 } 2468 devices.metaDataLoopbackSize = size 2469 case "dm.fs": 2470 if val != "ext4" && val != "xfs" { 2471 return nil, fmt.Errorf("devmapper: Unsupported filesystem %s\n", val) 2472 } 2473 devices.filesystem = val 2474 case "dm.mkfsarg": 2475 devices.mkfsArgs = append(devices.mkfsArgs, val) 2476 case "dm.mountopt": 2477 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2478 case "dm.metadatadev": 2479 devices.metadataDevice = val 2480 case "dm.datadev": 2481 devices.dataDevice = val 2482 case "dm.thinpooldev": 2483 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2484 case "dm.blkdiscard": 2485 foundBlkDiscard = true 2486 devices.doBlkDiscard, err = strconv.ParseBool(val) 2487 if err != nil { 2488 return nil, err 2489 } 2490 case "dm.blocksize": 2491 size, err := units.RAMInBytes(val) 2492 if err != nil { 2493 return nil, err 2494 } 2495 // convert to 512b sectors 2496 devices.thinpBlockSize = uint32(size) >> 9 2497 case "dm.override_udev_sync_check": 2498 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2499 if err != nil { 2500 return nil, err 2501 } 2502 2503 case "dm.use_deferred_removal": 2504 enableDeferredRemoval, err = strconv.ParseBool(val) 2505 if err != nil { 2506 return nil, err 2507 } 2508 2509 case "dm.use_deferred_deletion": 2510 enableDeferredDeletion, err = strconv.ParseBool(val) 2511 if err != nil { 2512 return nil, err 2513 } 2514 2515 default: 2516 return nil, fmt.Errorf("devmapper: Unknown option %s\n", key) 2517 } 2518 } 2519 2520 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2521 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2522 devices.doBlkDiscard = false 2523 } 2524 2525 if err := devices.initDevmapper(doInit); err != nil { 2526 return nil, err 2527 } 2528 2529 return devices, nil 2530 }