github.com/ojongerius/docker@v1.11.2/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "bufio" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 "path" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "syscall" 20 "time" 21 22 "github.com/Sirupsen/logrus" 23 24 "github.com/docker/docker/daemon/graphdriver" 25 "github.com/docker/docker/dockerversion" 26 "github.com/docker/docker/pkg/devicemapper" 27 "github.com/docker/docker/pkg/idtools" 28 "github.com/docker/docker/pkg/loopback" 29 "github.com/docker/docker/pkg/mount" 30 "github.com/docker/docker/pkg/parsers" 31 "github.com/docker/go-units" 32 33 "github.com/opencontainers/runc/libcontainer/label" 34 ) 35 36 var ( 37 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 38 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 39 defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024 40 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 41 defaultUdevSyncOverride = false 42 maxDeviceID = 0xffffff // 24 bit, pool limit 43 deviceIDMapSz = (maxDeviceID + 1) / 8 44 // We retry device removal so many a times that even error messages 45 // will fill up console during normal operation. So only log Fatal 46 // messages by default. 47 logLevel = devicemapper.LogLevelFatal 48 driverDeferredRemovalSupport = false 49 enableDeferredRemoval = false 50 enableDeferredDeletion = false 51 userBaseSize = false 52 defaultMinFreeSpacePercent uint32 = 10 53 ) 54 55 const deviceSetMetaFile string = "deviceset-metadata" 56 const transactionMetaFile string = "transaction-metadata" 57 58 type transaction struct { 59 OpenTransactionID uint64 `json:"open_transaction_id"` 60 DeviceIDHash string `json:"device_hash"` 61 DeviceID int `json:"device_id"` 62 } 63 64 type devInfo struct { 65 Hash string `json:"-"` 66 DeviceID int `json:"device_id"` 67 Size uint64 `json:"size"` 68 TransactionID uint64 `json:"transaction_id"` 69 Initialized bool `json:"initialized"` 70 Deleted bool `json:"deleted"` 71 devices *DeviceSet 72 73 // The global DeviceSet lock guarantees that we serialize all 74 // the calls to libdevmapper (which is not threadsafe), but we 75 // sometimes release that lock while sleeping. In that case 76 // this per-device lock is still held, protecting against 77 // other accesses to the device that we're doing the wait on. 78 // 79 // WARNING: In order to avoid AB-BA deadlocks when releasing 80 // the global lock while holding the per-device locks all 81 // device locks must be acquired *before* the device lock, and 82 // multiple device locks should be acquired parent before child. 83 lock sync.Mutex 84 } 85 86 type metaData struct { 87 Devices map[string]*devInfo `json:"Devices"` 88 } 89 90 // DeviceSet holds information about list of devices 91 type DeviceSet struct { 92 metaData `json:"-"` 93 sync.Mutex `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper 94 root string 95 devicePrefix string 96 TransactionID uint64 `json:"-"` 97 NextDeviceID int `json:"next_device_id"` 98 deviceIDMap []byte 99 100 // Options 101 dataLoopbackSize int64 102 metaDataLoopbackSize int64 103 baseFsSize uint64 104 filesystem string 105 mountOptions string 106 mkfsArgs []string 107 dataDevice string // block or loop dev 108 dataLoopFile string // loopback file, if used 109 metadataDevice string // block or loop dev 110 metadataLoopFile string // loopback file, if used 111 doBlkDiscard bool 112 thinpBlockSize uint32 113 thinPoolDevice string 114 transaction `json:"-"` 115 overrideUdevSyncCheck bool 116 deferredRemove bool // use deferred removal 117 deferredDelete bool // use deferred deletion 118 BaseDeviceUUID string // save UUID of base device 119 BaseDeviceFilesystem string // save filesystem of base device 120 nrDeletedDevices uint // number of deleted devices 121 deletionWorkerTicker *time.Ticker 122 uidMaps []idtools.IDMap 123 gidMaps []idtools.IDMap 124 minFreeSpacePercent uint32 //min free space percentage in thinpool 125 } 126 127 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 128 type DiskUsage struct { 129 // Used bytes on the disk. 130 Used uint64 131 // Total bytes on the disk. 132 Total uint64 133 // Available bytes on the disk. 134 Available uint64 135 } 136 137 // Status returns the information about the device. 138 type Status struct { 139 // PoolName is the name of the data pool. 140 PoolName string 141 // DataFile is the actual block device for data. 142 DataFile string 143 // DataLoopback loopback file, if used. 144 DataLoopback string 145 // MetadataFile is the actual block device for metadata. 146 MetadataFile string 147 // MetadataLoopback is the loopback file, if used. 148 MetadataLoopback string 149 // Data is the disk used for data. 150 Data DiskUsage 151 // Metadata is the disk used for meta data. 152 Metadata DiskUsage 153 // BaseDeviceSize is base size of container and image 154 BaseDeviceSize uint64 155 // BaseDeviceFS is backing filesystem. 156 BaseDeviceFS string 157 // SectorSize size of the vector. 158 SectorSize uint64 159 // UdevSyncSupported is true if sync is supported. 160 UdevSyncSupported bool 161 // DeferredRemoveEnabled is true then the device is not unmounted. 162 DeferredRemoveEnabled bool 163 // True if deferred deletion is enabled. This is different from 164 // deferred removal. "removal" means that device mapper device is 165 // deactivated. Thin device is still in thin pool and can be activated 166 // again. But "deletion" means that thin device will be deleted from 167 // thin pool and it can't be activated again. 168 DeferredDeleteEnabled bool 169 DeferredDeletedDeviceCount uint 170 } 171 172 // Structure used to export image/container metadata in docker inspect. 173 type deviceMetadata struct { 174 deviceID int 175 deviceSize uint64 // size in bytes 176 deviceName string // Device name as used during activation 177 } 178 179 // DevStatus returns information about device mounted containing its id, size and sector information. 180 type DevStatus struct { 181 // DeviceID is the id of the device. 182 DeviceID int 183 // Size is the size of the filesystem. 184 Size uint64 185 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 186 TransactionID uint64 187 // SizeInSectors indicates the size of the sectors allocated. 188 SizeInSectors uint64 189 // MappedSectors indicates number of mapped sectors. 190 MappedSectors uint64 191 // HighestMappedSector is the pointer to the highest mapped sector. 192 HighestMappedSector uint64 193 } 194 195 func getDevName(name string) string { 196 return "/dev/mapper/" + name 197 } 198 199 func (info *devInfo) Name() string { 200 hash := info.Hash 201 if hash == "" { 202 hash = "base" 203 } 204 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 205 } 206 207 func (info *devInfo) DevName() string { 208 return getDevName(info.Name()) 209 } 210 211 func (devices *DeviceSet) loopbackDir() string { 212 return path.Join(devices.root, "devicemapper") 213 } 214 215 func (devices *DeviceSet) metadataDir() string { 216 return path.Join(devices.root, "metadata") 217 } 218 219 func (devices *DeviceSet) metadataFile(info *devInfo) string { 220 file := info.Hash 221 if file == "" { 222 file = "base" 223 } 224 return path.Join(devices.metadataDir(), file) 225 } 226 227 func (devices *DeviceSet) transactionMetaFile() string { 228 return path.Join(devices.metadataDir(), transactionMetaFile) 229 } 230 231 func (devices *DeviceSet) deviceSetMetaFile() string { 232 return path.Join(devices.metadataDir(), deviceSetMetaFile) 233 } 234 235 func (devices *DeviceSet) oldMetadataFile() string { 236 return path.Join(devices.loopbackDir(), "json") 237 } 238 239 func (devices *DeviceSet) getPoolName() string { 240 if devices.thinPoolDevice == "" { 241 return devices.devicePrefix + "-pool" 242 } 243 return devices.thinPoolDevice 244 } 245 246 func (devices *DeviceSet) getPoolDevName() string { 247 return getDevName(devices.getPoolName()) 248 } 249 250 func (devices *DeviceSet) hasImage(name string) bool { 251 dirname := devices.loopbackDir() 252 filename := path.Join(dirname, name) 253 254 _, err := os.Stat(filename) 255 return err == nil 256 } 257 258 // ensureImage creates a sparse file of <size> bytes at the path 259 // <root>/devicemapper/<name>. 260 // If the file already exists and new size is larger than its current size, it grows to the new size. 261 // Either way it returns the full path. 262 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 263 dirname := devices.loopbackDir() 264 filename := path.Join(dirname, name) 265 266 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 267 if err != nil { 268 return "", err 269 } 270 if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) { 271 return "", err 272 } 273 274 if fi, err := os.Stat(filename); err != nil { 275 if !os.IsNotExist(err) { 276 return "", err 277 } 278 logrus.Debugf("devmapper: Creating loopback file %s for device-manage use", filename) 279 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 280 if err != nil { 281 return "", err 282 } 283 defer file.Close() 284 285 if err := file.Truncate(size); err != nil { 286 return "", err 287 } 288 } else { 289 if fi.Size() < size { 290 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 291 if err != nil { 292 return "", err 293 } 294 defer file.Close() 295 if err := file.Truncate(size); err != nil { 296 return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err) 297 } 298 } else if fi.Size() > size { 299 logrus.Warnf("devmapper: Can't shrink loopback file %s", filename) 300 } 301 } 302 return filename, nil 303 } 304 305 func (devices *DeviceSet) allocateTransactionID() uint64 { 306 devices.OpenTransactionID = devices.TransactionID + 1 307 return devices.OpenTransactionID 308 } 309 310 func (devices *DeviceSet) updatePoolTransactionID() error { 311 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 312 return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err) 313 } 314 devices.TransactionID = devices.OpenTransactionID 315 return nil 316 } 317 318 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 319 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 320 return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err) 321 } 322 return nil 323 } 324 325 // Given json data and file path, write it to disk 326 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 327 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 328 if err != nil { 329 return fmt.Errorf("devmapper: Error creating metadata file: %s", err) 330 } 331 332 n, err := tmpFile.Write(jsonData) 333 if err != nil { 334 return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err) 335 } 336 if n < len(jsonData) { 337 return io.ErrShortWrite 338 } 339 if err := tmpFile.Sync(); err != nil { 340 return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err) 341 } 342 if err := tmpFile.Close(); err != nil { 343 return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err) 344 } 345 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 346 return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err) 347 } 348 349 return nil 350 } 351 352 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 353 jsonData, err := json.Marshal(info) 354 if err != nil { 355 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 356 } 357 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 358 return err 359 } 360 return nil 361 } 362 363 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 364 var mask byte 365 i := deviceID % 8 366 mask = 1 << uint(i) 367 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 368 } 369 370 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 371 var mask byte 372 i := deviceID % 8 373 mask = ^(1 << uint(i)) 374 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 375 } 376 377 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 378 var mask byte 379 i := deviceID % 8 380 mask = (1 << uint(i)) 381 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 382 return false 383 } 384 return true 385 } 386 387 // Should be called with devices.Lock() held. 388 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 389 info := devices.Devices[hash] 390 if info == nil { 391 info = devices.loadMetadata(hash) 392 if info == nil { 393 return nil, fmt.Errorf("devmapper: Unknown device %s", hash) 394 } 395 396 devices.Devices[hash] = info 397 } 398 return info, nil 399 } 400 401 func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) { 402 devices.Lock() 403 defer devices.Unlock() 404 info, err := devices.lookupDevice(hash) 405 return info, err 406 } 407 408 // This function relies on that device hash map has been loaded in advance. 409 // Should be called with devices.Lock() held. 410 func (devices *DeviceSet) constructDeviceIDMap() { 411 logrus.Debugf("devmapper: constructDeviceIDMap()") 412 defer logrus.Debugf("devmapper: constructDeviceIDMap() END") 413 414 for _, info := range devices.Devices { 415 devices.markDeviceIDUsed(info.DeviceID) 416 logrus.Debugf("devmapper: Added deviceId=%d to DeviceIdMap", info.DeviceID) 417 } 418 } 419 420 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 421 422 // Skip some of the meta files which are not device files. 423 if strings.HasSuffix(finfo.Name(), ".migrated") { 424 logrus.Debugf("devmapper: Skipping file %s", path) 425 return nil 426 } 427 428 if strings.HasPrefix(finfo.Name(), ".") { 429 logrus.Debugf("devmapper: Skipping file %s", path) 430 return nil 431 } 432 433 if finfo.Name() == deviceSetMetaFile { 434 logrus.Debugf("devmapper: Skipping file %s", path) 435 return nil 436 } 437 438 if finfo.Name() == transactionMetaFile { 439 logrus.Debugf("devmapper: Skipping file %s", path) 440 return nil 441 } 442 443 logrus.Debugf("devmapper: Loading data for file %s", path) 444 445 hash := finfo.Name() 446 if hash == "base" { 447 hash = "" 448 } 449 450 // Include deleted devices also as cleanup delete device logic 451 // will go through it and see if there are any deleted devices. 452 if _, err := devices.lookupDevice(hash); err != nil { 453 return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err) 454 } 455 456 return nil 457 } 458 459 func (devices *DeviceSet) loadDeviceFilesOnStart() error { 460 logrus.Debugf("devmapper: loadDeviceFilesOnStart()") 461 defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END") 462 463 var scan = func(path string, info os.FileInfo, err error) error { 464 if err != nil { 465 logrus.Debugf("devmapper: Can't walk the file %s", path) 466 return nil 467 } 468 469 // Skip any directories 470 if info.IsDir() { 471 return nil 472 } 473 474 return devices.deviceFileWalkFunction(path, info) 475 } 476 477 return filepath.Walk(devices.metadataDir(), scan) 478 } 479 480 // Should be called with devices.Lock() held. 481 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 482 logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash) 483 info := &devInfo{ 484 Hash: hash, 485 DeviceID: id, 486 } 487 488 delete(devices.Devices, hash) 489 490 if err := devices.removeMetadata(info); err != nil { 491 logrus.Debugf("devmapper: Error removing metadata: %s", err) 492 return err 493 } 494 495 return nil 496 } 497 498 // Should be called with devices.Lock() held. 499 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 500 logrus.Debugf("devmapper: registerDevice(%v, %v)", id, hash) 501 info := &devInfo{ 502 Hash: hash, 503 DeviceID: id, 504 Size: size, 505 TransactionID: transactionID, 506 Initialized: false, 507 devices: devices, 508 } 509 510 devices.Devices[hash] = info 511 512 if err := devices.saveMetadata(info); err != nil { 513 // Try to remove unused device 514 delete(devices.Devices, hash) 515 return nil, err 516 } 517 518 return info, nil 519 } 520 521 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error { 522 logrus.Debugf("devmapper: activateDeviceIfNeeded(%v)", info.Hash) 523 524 if info.Deleted && !ignoreDeleted { 525 return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash) 526 } 527 528 // Make sure deferred removal on device is canceled, if one was 529 // scheduled. 530 if err := devices.cancelDeferredRemoval(info); err != nil { 531 return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err) 532 } 533 534 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 535 return nil 536 } 537 538 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 539 } 540 541 // Return true only if kernel supports xfs and mkfs.xfs is available 542 func xfsSupported() bool { 543 // Make sure mkfs.xfs is available 544 if _, err := exec.LookPath("mkfs.xfs"); err != nil { 545 return false 546 } 547 548 // Check if kernel supports xfs filesystem or not. 549 exec.Command("modprobe", "xfs").Run() 550 551 f, err := os.Open("/proc/filesystems") 552 if err != nil { 553 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 554 return false 555 } 556 defer f.Close() 557 558 s := bufio.NewScanner(f) 559 for s.Scan() { 560 if strings.HasSuffix(s.Text(), "\txfs") { 561 return true 562 } 563 } 564 565 if err := s.Err(); err != nil { 566 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 567 } 568 return false 569 } 570 571 func determineDefaultFS() string { 572 if xfsSupported() { 573 return "xfs" 574 } 575 576 logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem") 577 return "ext4" 578 } 579 580 func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) { 581 devname := info.DevName() 582 583 args := []string{} 584 for _, arg := range devices.mkfsArgs { 585 args = append(args, arg) 586 } 587 588 args = append(args, devname) 589 590 if devices.filesystem == "" { 591 devices.filesystem = determineDefaultFS() 592 } 593 if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil { 594 return err 595 } 596 597 logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name()) 598 defer func() { 599 if err != nil { 600 logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err) 601 } else { 602 logrus.Infof("devmapper: Successfully created filesystem %s on device %s", devices.filesystem, info.Name()) 603 } 604 }() 605 606 switch devices.filesystem { 607 case "xfs": 608 err = exec.Command("mkfs.xfs", args...).Run() 609 case "ext4": 610 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 611 if err != nil { 612 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 613 } 614 if err != nil { 615 return err 616 } 617 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 618 default: 619 err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem) 620 } 621 return 622 } 623 624 func (devices *DeviceSet) migrateOldMetaData() error { 625 // Migrate old metadata file 626 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 627 if err != nil && !os.IsNotExist(err) { 628 return err 629 } 630 631 if jsonData != nil { 632 m := metaData{Devices: make(map[string]*devInfo)} 633 634 if err := json.Unmarshal(jsonData, &m); err != nil { 635 return err 636 } 637 638 for hash, info := range m.Devices { 639 info.Hash = hash 640 devices.saveMetadata(info) 641 } 642 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 643 return err 644 } 645 646 } 647 648 return nil 649 } 650 651 // Cleanup deleted devices. It assumes that all the devices have been 652 // loaded in the hash table. 653 func (devices *DeviceSet) cleanupDeletedDevices() error { 654 devices.Lock() 655 656 // If there are no deleted devices, there is nothing to do. 657 if devices.nrDeletedDevices == 0 { 658 devices.Unlock() 659 return nil 660 } 661 662 var deletedDevices []*devInfo 663 664 for _, info := range devices.Devices { 665 if !info.Deleted { 666 continue 667 } 668 logrus.Debugf("devmapper: Found deleted device %s.", info.Hash) 669 deletedDevices = append(deletedDevices, info) 670 } 671 672 // Delete the deleted devices. DeleteDevice() first takes the info lock 673 // and then devices.Lock(). So drop it to avoid deadlock. 674 devices.Unlock() 675 676 for _, info := range deletedDevices { 677 // This will again try deferred deletion. 678 if err := devices.DeleteDevice(info.Hash, false); err != nil { 679 logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err) 680 } 681 } 682 683 return nil 684 } 685 686 func (devices *DeviceSet) countDeletedDevices() { 687 for _, info := range devices.Devices { 688 if !info.Deleted { 689 continue 690 } 691 devices.nrDeletedDevices++ 692 } 693 } 694 695 func (devices *DeviceSet) startDeviceDeletionWorker() { 696 // Deferred deletion is not enabled. Don't do anything. 697 if !devices.deferredDelete { 698 return 699 } 700 701 logrus.Debugf("devmapper: Worker to cleanup deleted devices started") 702 for range devices.deletionWorkerTicker.C { 703 devices.cleanupDeletedDevices() 704 } 705 } 706 707 func (devices *DeviceSet) initMetaData() error { 708 devices.Lock() 709 defer devices.Unlock() 710 711 if err := devices.migrateOldMetaData(); err != nil { 712 return err 713 } 714 715 _, transactionID, _, _, _, _, err := devices.poolStatus() 716 if err != nil { 717 return err 718 } 719 720 devices.TransactionID = transactionID 721 722 if err := devices.loadDeviceFilesOnStart(); err != nil { 723 return fmt.Errorf("devmapper: Failed to load device files:%v", err) 724 } 725 726 devices.constructDeviceIDMap() 727 devices.countDeletedDevices() 728 729 if err := devices.processPendingTransaction(); err != nil { 730 return err 731 } 732 733 // Start a goroutine to cleanup Deleted Devices 734 go devices.startDeviceDeletionWorker() 735 return nil 736 } 737 738 func (devices *DeviceSet) incNextDeviceID() { 739 // IDs are 24bit, so wrap around 740 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 741 } 742 743 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 744 devices.incNextDeviceID() 745 for i := 0; i <= maxDeviceID; i++ { 746 if devices.isDeviceIDFree(devices.NextDeviceID) { 747 devices.markDeviceIDUsed(devices.NextDeviceID) 748 return devices.NextDeviceID, nil 749 } 750 devices.incNextDeviceID() 751 } 752 753 return 0, fmt.Errorf("devmapper: Unable to find a free device ID") 754 } 755 756 func (devices *DeviceSet) poolHasFreeSpace() error { 757 if devices.minFreeSpacePercent == 0 { 758 return nil 759 } 760 761 _, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 762 if err != nil { 763 return err 764 } 765 766 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 767 if minFreeData < 1 { 768 minFreeData = 1 769 } 770 dataFree := dataTotal - dataUsed 771 if dataFree < minFreeData { 772 return fmt.Errorf("devmapper: Thin Pool has %v free data blocks which is less than minimum required %v free data blocks. Create more free space in thin pool or use dm.min_free_space option to change behavior", (dataTotal - dataUsed), minFreeData) 773 } 774 775 minFreeMetadata := (metadataTotal * uint64(devices.minFreeSpacePercent)) / 100 776 if minFreeMetadata < 1 { 777 minFreeMetadata = 1 778 } 779 780 metadataFree := metadataTotal - metadataUsed 781 if metadataFree < minFreeMetadata { 782 return fmt.Errorf("devmapper: Thin Pool has %v free metadata blocks which is less than minimum required %v free metadata blocks. Create more free metadata space in thin pool or use dm.min_free_space option to change behavior", (metadataTotal - metadataUsed), minFreeMetadata) 783 } 784 785 return nil 786 } 787 788 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 789 devices.Lock() 790 defer devices.Unlock() 791 792 deviceID, err := devices.getNextFreeDeviceID() 793 if err != nil { 794 return nil, err 795 } 796 797 if err := devices.openTransaction(hash, deviceID); err != nil { 798 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 799 devices.markDeviceIDFree(deviceID) 800 return nil, err 801 } 802 803 for { 804 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 805 if devicemapper.DeviceIDExists(err) { 806 // Device ID already exists. This should not 807 // happen. Now we have a mechanism to find 808 // a free device ID. So something is not right. 809 // Give a warning and continue. 810 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 811 deviceID, err = devices.getNextFreeDeviceID() 812 if err != nil { 813 return nil, err 814 } 815 // Save new device id into transaction 816 devices.refreshTransaction(deviceID) 817 continue 818 } 819 logrus.Debugf("devmapper: Error creating device: %s", err) 820 devices.markDeviceIDFree(deviceID) 821 return nil, err 822 } 823 break 824 } 825 826 logrus.Debugf("devmapper: Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 827 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 828 if err != nil { 829 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 830 devices.markDeviceIDFree(deviceID) 831 return nil, err 832 } 833 834 if err := devices.closeTransaction(); err != nil { 835 devices.unregisterDevice(deviceID, hash) 836 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 837 devices.markDeviceIDFree(deviceID) 838 return nil, err 839 } 840 return info, nil 841 } 842 843 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo) error { 844 if err := devices.poolHasFreeSpace(); err != nil { 845 return err 846 } 847 848 deviceID, err := devices.getNextFreeDeviceID() 849 if err != nil { 850 return err 851 } 852 853 if err := devices.openTransaction(hash, deviceID); err != nil { 854 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 855 devices.markDeviceIDFree(deviceID) 856 return err 857 } 858 859 for { 860 if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil { 861 if devicemapper.DeviceIDExists(err) { 862 // Device ID already exists. This should not 863 // happen. Now we have a mechanism to find 864 // a free device ID. So something is not right. 865 // Give a warning and continue. 866 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 867 deviceID, err = devices.getNextFreeDeviceID() 868 if err != nil { 869 return err 870 } 871 // Save new device id into transaction 872 devices.refreshTransaction(deviceID) 873 continue 874 } 875 logrus.Debugf("devmapper: Error creating snap device: %s", err) 876 devices.markDeviceIDFree(deviceID) 877 return err 878 } 879 break 880 } 881 882 if _, err := devices.registerDevice(deviceID, hash, baseInfo.Size, devices.OpenTransactionID); err != nil { 883 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 884 devices.markDeviceIDFree(deviceID) 885 logrus.Debugf("devmapper: Error registering device: %s", err) 886 return err 887 } 888 889 if err := devices.closeTransaction(); err != nil { 890 devices.unregisterDevice(deviceID, hash) 891 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 892 devices.markDeviceIDFree(deviceID) 893 return err 894 } 895 return nil 896 } 897 898 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 899 info := &devInfo{Hash: hash, devices: devices} 900 901 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 902 if err != nil { 903 return nil 904 } 905 906 if err := json.Unmarshal(jsonData, &info); err != nil { 907 return nil 908 } 909 910 if info.DeviceID > maxDeviceID { 911 logrus.Errorf("devmapper: Ignoring Invalid DeviceId=%d", info.DeviceID) 912 return nil 913 } 914 915 return info 916 } 917 918 func getDeviceUUID(device string) (string, error) { 919 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 920 if err != nil { 921 return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err) 922 } 923 924 uuid := strings.TrimSuffix(string(out), "\n") 925 uuid = strings.TrimSpace(uuid) 926 logrus.Debugf("devmapper: UUID for device: %s is:%s", device, uuid) 927 return uuid, nil 928 } 929 930 func (devices *DeviceSet) getBaseDeviceSize() uint64 { 931 info, _ := devices.lookupDevice("") 932 if info == nil { 933 return 0 934 } 935 return info.Size 936 } 937 938 func (devices *DeviceSet) getBaseDeviceFS() string { 939 return devices.BaseDeviceFilesystem 940 } 941 942 func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error { 943 devices.Lock() 944 defer devices.Unlock() 945 946 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 947 return err 948 } 949 defer devices.deactivateDevice(baseInfo) 950 951 uuid, err := getDeviceUUID(baseInfo.DevName()) 952 if err != nil { 953 return err 954 } 955 956 if devices.BaseDeviceUUID != uuid { 957 return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID) 958 } 959 960 if devices.BaseDeviceFilesystem == "" { 961 fsType, err := ProbeFsType(baseInfo.DevName()) 962 if err != nil { 963 return err 964 } 965 if err := devices.saveBaseDeviceFilesystem(fsType); err != nil { 966 return err 967 } 968 } 969 970 // If user specified a filesystem using dm.fs option and current 971 // file system of base image is not same, warn user that dm.fs 972 // will be ignored. 973 if devices.BaseDeviceFilesystem != devices.filesystem { 974 logrus.Warnf("devmapper: Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem) 975 devices.filesystem = devices.BaseDeviceFilesystem 976 } 977 return nil 978 } 979 980 func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error { 981 devices.BaseDeviceFilesystem = fs 982 return devices.saveDeviceSetMetaData() 983 } 984 985 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 986 devices.Lock() 987 defer devices.Unlock() 988 989 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 990 return err 991 } 992 defer devices.deactivateDevice(baseInfo) 993 994 uuid, err := getDeviceUUID(baseInfo.DevName()) 995 if err != nil { 996 return err 997 } 998 999 devices.BaseDeviceUUID = uuid 1000 return devices.saveDeviceSetMetaData() 1001 } 1002 1003 func (devices *DeviceSet) createBaseImage() error { 1004 logrus.Debugf("devmapper: Initializing base device-mapper thin volume") 1005 1006 // Create initial device 1007 info, err := devices.createRegisterDevice("") 1008 if err != nil { 1009 return err 1010 } 1011 1012 logrus.Debugf("devmapper: Creating filesystem on base device-mapper thin volume") 1013 1014 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1015 return err 1016 } 1017 1018 if err := devices.createFilesystem(info); err != nil { 1019 return err 1020 } 1021 1022 info.Initialized = true 1023 if err := devices.saveMetadata(info); err != nil { 1024 info.Initialized = false 1025 return err 1026 } 1027 1028 if err := devices.saveBaseDeviceUUID(info); err != nil { 1029 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1030 } 1031 1032 return nil 1033 } 1034 1035 // Returns if thin pool device exists or not. If device exists, also makes 1036 // sure it is a thin pool device and not some other type of device. 1037 func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) { 1038 logrus.Debugf("devmapper: Checking for existence of the pool %s", thinPoolDevice) 1039 1040 info, err := devicemapper.GetInfo(thinPoolDevice) 1041 if err != nil { 1042 return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err) 1043 } 1044 1045 // Device does not exist. 1046 if info.Exists == 0 { 1047 return false, nil 1048 } 1049 1050 _, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice) 1051 if err != nil { 1052 return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err) 1053 } 1054 1055 if deviceType != "thin-pool" { 1056 return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice) 1057 } 1058 1059 return true, nil 1060 } 1061 1062 func (devices *DeviceSet) checkThinPool() error { 1063 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 1064 if err != nil { 1065 return err 1066 } 1067 if dataUsed != 0 { 1068 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks", 1069 devices.thinPoolDevice) 1070 } 1071 if transactionID != 0 { 1072 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 1073 devices.thinPoolDevice) 1074 } 1075 return nil 1076 } 1077 1078 // Base image is initialized properly. Either save UUID for first time (for 1079 // upgrade case or verify UUID. 1080 func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error { 1081 // If BaseDeviceUUID is nil (upgrade case), save it and return success. 1082 if devices.BaseDeviceUUID == "" { 1083 if err := devices.saveBaseDeviceUUID(baseInfo); err != nil { 1084 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1085 } 1086 return nil 1087 } 1088 1089 if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil { 1090 return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed.%v", err) 1091 } 1092 1093 return nil 1094 } 1095 1096 func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error { 1097 1098 if !userBaseSize { 1099 return nil 1100 } 1101 1102 if devices.baseFsSize < devices.getBaseDeviceSize() { 1103 return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize()))) 1104 } 1105 1106 if devices.baseFsSize == devices.getBaseDeviceSize() { 1107 return nil 1108 } 1109 1110 info.lock.Lock() 1111 defer info.lock.Unlock() 1112 1113 devices.Lock() 1114 defer devices.Unlock() 1115 1116 info.Size = devices.baseFsSize 1117 1118 if err := devices.saveMetadata(info); err != nil { 1119 // Try to remove unused device 1120 delete(devices.Devices, info.Hash) 1121 return err 1122 } 1123 1124 return devices.growFS(info) 1125 } 1126 1127 func (devices *DeviceSet) growFS(info *devInfo) error { 1128 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1129 return fmt.Errorf("Error activating devmapper device: %s", err) 1130 } 1131 1132 defer devices.deactivateDevice(info) 1133 1134 fsMountPoint := "/run/docker/mnt" 1135 if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) { 1136 if err := os.MkdirAll(fsMountPoint, 0700); err != nil { 1137 return err 1138 } 1139 defer os.RemoveAll(fsMountPoint) 1140 } 1141 1142 options := "" 1143 if devices.BaseDeviceFilesystem == "xfs" { 1144 // XFS needs nouuid or it can't mount filesystems with the same fs 1145 options = joinMountOptions(options, "nouuid") 1146 } 1147 options = joinMountOptions(options, devices.mountOptions) 1148 1149 if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil { 1150 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err) 1151 } 1152 1153 defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH) 1154 1155 switch devices.BaseDeviceFilesystem { 1156 case "ext4": 1157 if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil { 1158 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1159 } 1160 case "xfs": 1161 if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil { 1162 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1163 } 1164 default: 1165 return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem) 1166 } 1167 return nil 1168 } 1169 1170 func (devices *DeviceSet) setupBaseImage() error { 1171 oldInfo, _ := devices.lookupDeviceWithLock("") 1172 1173 // base image already exists. If it is initialized properly, do UUID 1174 // verification and return. Otherwise remove image and set it up 1175 // fresh. 1176 1177 if oldInfo != nil { 1178 if oldInfo.Initialized && !oldInfo.Deleted { 1179 if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil { 1180 return err 1181 } 1182 1183 if err := devices.checkGrowBaseDeviceFS(oldInfo); err != nil { 1184 return err 1185 } 1186 1187 return nil 1188 } 1189 1190 logrus.Debugf("devmapper: Removing uninitialized base image") 1191 // If previous base device is in deferred delete state, 1192 // that needs to be cleaned up first. So don't try 1193 // deferred deletion. 1194 if err := devices.DeleteDevice("", true); err != nil { 1195 return err 1196 } 1197 } 1198 1199 // If we are setting up base image for the first time, make sure 1200 // thin pool is empty. 1201 if devices.thinPoolDevice != "" && oldInfo == nil { 1202 if err := devices.checkThinPool(); err != nil { 1203 return err 1204 } 1205 } 1206 1207 // Create new base image device 1208 if err := devices.createBaseImage(); err != nil { 1209 return err 1210 } 1211 1212 return nil 1213 } 1214 1215 func setCloseOnExec(name string) { 1216 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 1217 for _, i := range fileInfos { 1218 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 1219 if link == name { 1220 fd, err := strconv.Atoi(i.Name()) 1221 if err == nil { 1222 syscall.CloseOnExec(fd) 1223 } 1224 } 1225 } 1226 } 1227 } 1228 1229 // DMLog implements logging using DevMapperLogger interface. 1230 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 1231 // By default libdm sends us all the messages including debug ones. 1232 // We need to filter out messages here and figure out which one 1233 // should be printed. 1234 if level > logLevel { 1235 return 1236 } 1237 1238 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1239 if level <= devicemapper.LogLevelErr { 1240 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1241 } else if level <= devicemapper.LogLevelInfo { 1242 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1243 } else { 1244 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1245 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1246 } 1247 } 1248 1249 func major(device uint64) uint64 { 1250 return (device >> 8) & 0xfff 1251 } 1252 1253 func minor(device uint64) uint64 { 1254 return (device & 0xff) | ((device >> 12) & 0xfff00) 1255 } 1256 1257 // ResizePool increases the size of the pool. 1258 func (devices *DeviceSet) ResizePool(size int64) error { 1259 dirname := devices.loopbackDir() 1260 datafilename := path.Join(dirname, "data") 1261 if len(devices.dataDevice) > 0 { 1262 datafilename = devices.dataDevice 1263 } 1264 metadatafilename := path.Join(dirname, "metadata") 1265 if len(devices.metadataDevice) > 0 { 1266 metadatafilename = devices.metadataDevice 1267 } 1268 1269 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 1270 if datafile == nil { 1271 return err 1272 } 1273 defer datafile.Close() 1274 1275 fi, err := datafile.Stat() 1276 if fi == nil { 1277 return err 1278 } 1279 1280 if fi.Size() > size { 1281 return fmt.Errorf("devmapper: Can't shrink file") 1282 } 1283 1284 dataloopback := loopback.FindLoopDeviceFor(datafile) 1285 if dataloopback == nil { 1286 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename) 1287 } 1288 defer dataloopback.Close() 1289 1290 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 1291 if metadatafile == nil { 1292 return err 1293 } 1294 defer metadatafile.Close() 1295 1296 metadataloopback := loopback.FindLoopDeviceFor(metadatafile) 1297 if metadataloopback == nil { 1298 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename) 1299 } 1300 defer metadataloopback.Close() 1301 1302 // Grow loopback file 1303 if err := datafile.Truncate(size); err != nil { 1304 return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err) 1305 } 1306 1307 // Reload size for loopback device 1308 if err := loopback.SetCapacity(dataloopback); err != nil { 1309 return fmt.Errorf("Unable to update loopback capacity: %s", err) 1310 } 1311 1312 // Suspend the pool 1313 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 1314 return fmt.Errorf("devmapper: Unable to suspend pool: %s", err) 1315 } 1316 1317 // Reload with the new block sizes 1318 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 1319 return fmt.Errorf("devmapper: Unable to reload pool: %s", err) 1320 } 1321 1322 // Resume the pool 1323 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 1324 return fmt.Errorf("devmapper: Unable to resume pool: %s", err) 1325 } 1326 1327 return nil 1328 } 1329 1330 func (devices *DeviceSet) loadTransactionMetaData() error { 1331 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 1332 if err != nil { 1333 // There is no active transaction. This will be the case 1334 // during upgrade. 1335 if os.IsNotExist(err) { 1336 devices.OpenTransactionID = devices.TransactionID 1337 return nil 1338 } 1339 return err 1340 } 1341 1342 json.Unmarshal(jsonData, &devices.transaction) 1343 return nil 1344 } 1345 1346 func (devices *DeviceSet) saveTransactionMetaData() error { 1347 jsonData, err := json.Marshal(&devices.transaction) 1348 if err != nil { 1349 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1350 } 1351 1352 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 1353 } 1354 1355 func (devices *DeviceSet) removeTransactionMetaData() error { 1356 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 1357 return err 1358 } 1359 return nil 1360 } 1361 1362 func (devices *DeviceSet) rollbackTransaction() error { 1363 logrus.Debugf("devmapper: Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 1364 1365 // A device id might have already been deleted before transaction 1366 // closed. In that case this call will fail. Just leave a message 1367 // in case of failure. 1368 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 1369 logrus.Errorf("devmapper: Unable to delete device: %s", err) 1370 } 1371 1372 dinfo := &devInfo{Hash: devices.DeviceIDHash} 1373 if err := devices.removeMetadata(dinfo); err != nil { 1374 logrus.Errorf("devmapper: Unable to remove metadata: %s", err) 1375 } else { 1376 devices.markDeviceIDFree(devices.DeviceID) 1377 } 1378 1379 if err := devices.removeTransactionMetaData(); err != nil { 1380 logrus.Errorf("devmapper: Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 1381 } 1382 1383 return nil 1384 } 1385 1386 func (devices *DeviceSet) processPendingTransaction() error { 1387 if err := devices.loadTransactionMetaData(); err != nil { 1388 return err 1389 } 1390 1391 // If there was open transaction but pool transaction ID is same 1392 // as open transaction ID, nothing to roll back. 1393 if devices.TransactionID == devices.OpenTransactionID { 1394 return nil 1395 } 1396 1397 // If open transaction ID is less than pool transaction ID, something 1398 // is wrong. Bail out. 1399 if devices.OpenTransactionID < devices.TransactionID { 1400 logrus.Errorf("devmapper: Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1401 return nil 1402 } 1403 1404 // Pool transaction ID is not same as open transaction. There is 1405 // a transaction which was not completed. 1406 if err := devices.rollbackTransaction(); err != nil { 1407 return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err) 1408 } 1409 1410 devices.OpenTransactionID = devices.TransactionID 1411 return nil 1412 } 1413 1414 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1415 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1416 if err != nil { 1417 // For backward compatibility return success if file does 1418 // not exist. 1419 if os.IsNotExist(err) { 1420 return nil 1421 } 1422 return err 1423 } 1424 1425 return json.Unmarshal(jsonData, devices) 1426 } 1427 1428 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1429 jsonData, err := json.Marshal(devices) 1430 if err != nil { 1431 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1432 } 1433 1434 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1435 } 1436 1437 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1438 devices.allocateTransactionID() 1439 devices.DeviceIDHash = hash 1440 devices.DeviceID = DeviceID 1441 if err := devices.saveTransactionMetaData(); err != nil { 1442 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1443 } 1444 return nil 1445 } 1446 1447 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1448 devices.DeviceID = DeviceID 1449 if err := devices.saveTransactionMetaData(); err != nil { 1450 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1451 } 1452 return nil 1453 } 1454 1455 func (devices *DeviceSet) closeTransaction() error { 1456 if err := devices.updatePoolTransactionID(); err != nil { 1457 logrus.Debugf("devmapper: Failed to close Transaction") 1458 return err 1459 } 1460 return nil 1461 } 1462 1463 func determineDriverCapabilities(version string) error { 1464 /* 1465 * Driver version 4.27.0 and greater support deferred activation 1466 * feature. 1467 */ 1468 1469 logrus.Debugf("devicemapper: driver version is %s", version) 1470 1471 versionSplit := strings.Split(version, ".") 1472 major, err := strconv.Atoi(versionSplit[0]) 1473 if err != nil { 1474 return graphdriver.ErrNotSupported 1475 } 1476 1477 if major > 4 { 1478 driverDeferredRemovalSupport = true 1479 return nil 1480 } 1481 1482 if major < 4 { 1483 return nil 1484 } 1485 1486 minor, err := strconv.Atoi(versionSplit[1]) 1487 if err != nil { 1488 return graphdriver.ErrNotSupported 1489 } 1490 1491 /* 1492 * If major is 4 and minor is 27, then there is no need to 1493 * check for patch level as it can not be less than 0. 1494 */ 1495 if minor >= 27 { 1496 driverDeferredRemovalSupport = true 1497 return nil 1498 } 1499 1500 return nil 1501 } 1502 1503 // Determine the major and minor number of loopback device 1504 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1505 stat, err := file.Stat() 1506 if err != nil { 1507 return 0, 0, err 1508 } 1509 1510 dev := stat.Sys().(*syscall.Stat_t).Rdev 1511 majorNum := major(dev) 1512 minorNum := minor(dev) 1513 1514 logrus.Debugf("devmapper: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1515 return majorNum, minorNum, nil 1516 } 1517 1518 // Given a file which is backing file of a loop back device, find the 1519 // loopback device name and its major/minor number. 1520 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1521 file, err := os.Open(filename) 1522 if err != nil { 1523 logrus.Debugf("devmapper: Failed to open file %s", filename) 1524 return "", 0, 0, err 1525 } 1526 1527 defer file.Close() 1528 loopbackDevice := loopback.FindLoopDeviceFor(file) 1529 if loopbackDevice == nil { 1530 return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename) 1531 } 1532 defer loopbackDevice.Close() 1533 1534 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1535 if err != nil { 1536 return "", 0, 0, err 1537 } 1538 return loopbackDevice.Name(), Major, Minor, nil 1539 } 1540 1541 // Get the major/minor numbers of thin pool data and metadata devices 1542 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1543 var params, poolDataMajMin, poolMetadataMajMin string 1544 1545 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1546 if err != nil { 1547 return 0, 0, 0, 0, err 1548 } 1549 1550 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1551 return 0, 0, 0, 0, err 1552 } 1553 1554 logrus.Debugf("devmapper: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1555 1556 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1557 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1558 if err != nil { 1559 return 0, 0, 0, 0, err 1560 } 1561 1562 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1563 if err != nil { 1564 return 0, 0, 0, 0, err 1565 } 1566 1567 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1568 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1569 if err != nil { 1570 return 0, 0, 0, 0, err 1571 } 1572 1573 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1574 if err != nil { 1575 return 0, 0, 0, 0, err 1576 } 1577 1578 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1579 } 1580 1581 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1582 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1583 if err != nil { 1584 return err 1585 } 1586 1587 dirname := devices.loopbackDir() 1588 1589 // data device has not been passed in. So there should be a data file 1590 // which is being mounted as loop device. 1591 if devices.dataDevice == "" { 1592 datafilename := path.Join(dirname, "data") 1593 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1594 if err != nil { 1595 return err 1596 } 1597 1598 // Compare the two 1599 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1600 devices.dataDevice = dataLoopDevice 1601 devices.dataLoopFile = datafilename 1602 } 1603 1604 } 1605 1606 // metadata device has not been passed in. So there should be a 1607 // metadata file which is being mounted as loop device. 1608 if devices.metadataDevice == "" { 1609 metadatafilename := path.Join(dirname, "metadata") 1610 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1611 if err != nil { 1612 return err 1613 } 1614 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1615 devices.metadataDevice = metadataLoopDevice 1616 devices.metadataLoopFile = metadatafilename 1617 } 1618 } 1619 1620 return nil 1621 } 1622 1623 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1624 // give ourselves to libdm as a log handler 1625 devicemapper.LogInit(devices) 1626 1627 version, err := devicemapper.GetDriverVersion() 1628 if err != nil { 1629 // Can't even get driver version, assume not supported 1630 return graphdriver.ErrNotSupported 1631 } 1632 1633 if err := determineDriverCapabilities(version); err != nil { 1634 return graphdriver.ErrNotSupported 1635 } 1636 1637 // If user asked for deferred removal then check both libdm library 1638 // and kernel driver support deferred removal otherwise error out. 1639 if enableDeferredRemoval { 1640 if !driverDeferredRemovalSupport { 1641 return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it") 1642 } 1643 if !devicemapper.LibraryDeferredRemovalSupport { 1644 return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it") 1645 } 1646 logrus.Debugf("devmapper: Deferred removal support enabled.") 1647 devices.deferredRemove = true 1648 } 1649 1650 if enableDeferredDeletion { 1651 if !devices.deferredRemove { 1652 return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter") 1653 } 1654 logrus.Debugf("devmapper: Deferred deletion support enabled.") 1655 devices.deferredDelete = true 1656 } 1657 1658 // https://github.com/docker/docker/issues/4036 1659 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1660 if dockerversion.IAmStatic == "true" { 1661 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1662 } else { 1663 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1664 } 1665 1666 if !devices.overrideUdevSyncCheck { 1667 return graphdriver.ErrNotSupported 1668 } 1669 } 1670 1671 //create the root dir of the devmapper driver ownership to match this 1672 //daemon's remapped root uid/gid so containers can start properly 1673 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 1674 if err != nil { 1675 return err 1676 } 1677 if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) { 1678 return err 1679 } 1680 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) { 1681 return err 1682 } 1683 1684 // Set the device prefix from the device id and inode of the docker root dir 1685 1686 st, err := os.Stat(devices.root) 1687 if err != nil { 1688 return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err) 1689 } 1690 sysSt := st.Sys().(*syscall.Stat_t) 1691 // "reg-" stands for "regular file". 1692 // In the future we might use "dev-" for "device file", etc. 1693 // docker-maj,min[-inode] stands for: 1694 // - Managed by docker 1695 // - The target of this device is at major <maj> and minor <min> 1696 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1697 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1698 logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix) 1699 1700 // Check for the existence of the thin-pool device 1701 poolExists, err := devices.thinPoolExists(devices.getPoolName()) 1702 if err != nil { 1703 return err 1704 } 1705 1706 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1707 // that are not Close-on-exec, 1708 // so we add this badhack to make sure it closes itself 1709 setCloseOnExec("/dev/mapper/control") 1710 1711 // Make sure the sparse images exist in <root>/devicemapper/data and 1712 // <root>/devicemapper/metadata 1713 1714 createdLoopback := false 1715 1716 // If the pool doesn't exist, create it 1717 if !poolExists && devices.thinPoolDevice == "" { 1718 logrus.Debugf("devmapper: Pool doesn't exist. Creating it.") 1719 1720 var ( 1721 dataFile *os.File 1722 metadataFile *os.File 1723 ) 1724 1725 if devices.dataDevice == "" { 1726 // Make sure the sparse images exist in <root>/devicemapper/data 1727 1728 hasData := devices.hasImage("data") 1729 1730 if !doInit && !hasData { 1731 return errors.New("Loopback data file not found") 1732 } 1733 1734 if !hasData { 1735 createdLoopback = true 1736 } 1737 1738 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1739 if err != nil { 1740 logrus.Debugf("devmapper: Error device ensureImage (data): %s", err) 1741 return err 1742 } 1743 1744 dataFile, err = loopback.AttachLoopDevice(data) 1745 if err != nil { 1746 return err 1747 } 1748 devices.dataLoopFile = data 1749 devices.dataDevice = dataFile.Name() 1750 } else { 1751 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1752 if err != nil { 1753 return err 1754 } 1755 } 1756 defer dataFile.Close() 1757 1758 if devices.metadataDevice == "" { 1759 // Make sure the sparse images exist in <root>/devicemapper/metadata 1760 1761 hasMetadata := devices.hasImage("metadata") 1762 1763 if !doInit && !hasMetadata { 1764 return errors.New("Loopback metadata file not found") 1765 } 1766 1767 if !hasMetadata { 1768 createdLoopback = true 1769 } 1770 1771 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1772 if err != nil { 1773 logrus.Debugf("devmapper: Error device ensureImage (metadata): %s", err) 1774 return err 1775 } 1776 1777 metadataFile, err = loopback.AttachLoopDevice(metadata) 1778 if err != nil { 1779 return err 1780 } 1781 devices.metadataLoopFile = metadata 1782 devices.metadataDevice = metadataFile.Name() 1783 } else { 1784 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1785 if err != nil { 1786 return err 1787 } 1788 } 1789 defer metadataFile.Close() 1790 1791 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1792 return err 1793 } 1794 } 1795 1796 // Pool already exists and caller did not pass us a pool. That means 1797 // we probably created pool earlier and could not remove it as some 1798 // containers were still using it. Detect some of the properties of 1799 // pool, like is it using loop devices. 1800 if poolExists && devices.thinPoolDevice == "" { 1801 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1802 logrus.Debugf("devmapper: Failed to load thin pool loopback device information:%v", err) 1803 return err 1804 } 1805 } 1806 1807 // If we didn't just create the data or metadata image, we need to 1808 // load the transaction id and migrate old metadata 1809 if !createdLoopback { 1810 if err := devices.initMetaData(); err != nil { 1811 return err 1812 } 1813 } 1814 1815 if devices.thinPoolDevice == "" { 1816 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1817 logrus.Warnf("devmapper: Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1818 } 1819 } 1820 1821 // Right now this loads only NextDeviceID. If there is more metadata 1822 // down the line, we might have to move it earlier. 1823 if err := devices.loadDeviceSetMetaData(); err != nil { 1824 return err 1825 } 1826 1827 // Setup the base image 1828 if doInit { 1829 if err := devices.setupBaseImage(); err != nil { 1830 logrus.Debugf("devmapper: Error device setupBaseImage: %s", err) 1831 return err 1832 } 1833 } 1834 1835 return nil 1836 } 1837 1838 // AddDevice adds a device and registers in the hash. 1839 func (devices *DeviceSet) AddDevice(hash, baseHash string) error { 1840 logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash) 1841 defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1842 1843 // If a deleted device exists, return error. 1844 baseInfo, err := devices.lookupDeviceWithLock(baseHash) 1845 if err != nil { 1846 return err 1847 } 1848 1849 if baseInfo.Deleted { 1850 return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash) 1851 } 1852 1853 baseInfo.lock.Lock() 1854 defer baseInfo.lock.Unlock() 1855 1856 devices.Lock() 1857 defer devices.Unlock() 1858 1859 // Also include deleted devices in case hash of new device is 1860 // same as one of the deleted devices. 1861 if info, _ := devices.lookupDevice(hash); info != nil { 1862 return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted) 1863 } 1864 1865 if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil { 1866 return err 1867 } 1868 1869 return nil 1870 } 1871 1872 func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error { 1873 // If device is already in deleted state, there is nothing to be done. 1874 if info.Deleted { 1875 return nil 1876 } 1877 1878 logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash) 1879 1880 info.Deleted = true 1881 1882 // save device metadata to reflect deleted state. 1883 if err := devices.saveMetadata(info); err != nil { 1884 info.Deleted = false 1885 return err 1886 } 1887 1888 devices.nrDeletedDevices++ 1889 return nil 1890 } 1891 1892 // Should be called with devices.Lock() held. 1893 func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error { 1894 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1895 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceId = %d", "", info.DeviceID) 1896 return err 1897 } 1898 1899 defer devices.closeTransaction() 1900 1901 err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID) 1902 if err != nil { 1903 // If syncDelete is true, we want to return error. If deferred 1904 // deletion is not enabled, we return an error. If error is 1905 // something other then EBUSY, return an error. 1906 if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy { 1907 logrus.Debugf("devmapper: Error deleting device: %s", err) 1908 return err 1909 } 1910 } 1911 1912 if err == nil { 1913 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 1914 return err 1915 } 1916 // If device was already in deferred delete state that means 1917 // deletion was being tried again later. Reduce the deleted 1918 // device count. 1919 if info.Deleted { 1920 devices.nrDeletedDevices-- 1921 } 1922 devices.markDeviceIDFree(info.DeviceID) 1923 } else { 1924 if err := devices.markForDeferredDeletion(info); err != nil { 1925 return err 1926 } 1927 } 1928 1929 return nil 1930 } 1931 1932 // Issue discard only if device open count is zero. 1933 func (devices *DeviceSet) issueDiscard(info *devInfo) error { 1934 logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash) 1935 defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash) 1936 // This is a workaround for the kernel not discarding block so 1937 // on the thin pool when we remove a thinp device, so we do it 1938 // manually. 1939 // Even if device is deferred deleted, activate it and issue 1940 // discards. 1941 if err := devices.activateDeviceIfNeeded(info, true); err != nil { 1942 return err 1943 } 1944 1945 devinfo, err := devicemapper.GetInfo(info.Name()) 1946 if err != nil { 1947 return err 1948 } 1949 1950 if devinfo.OpenCount != 0 { 1951 logrus.Debugf("devmapper: Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount) 1952 return nil 1953 } 1954 1955 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 1956 logrus.Debugf("devmapper: Error discarding block on device: %s (ignoring)", err) 1957 } 1958 return nil 1959 } 1960 1961 // Should be called with devices.Lock() held. 1962 func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error { 1963 if devices.doBlkDiscard { 1964 devices.issueDiscard(info) 1965 } 1966 1967 // Try to deactivate device in case it is active. 1968 if err := devices.deactivateDevice(info); err != nil { 1969 logrus.Debugf("devmapper: Error deactivating device: %s", err) 1970 return err 1971 } 1972 1973 if err := devices.deleteTransaction(info, syncDelete); err != nil { 1974 return err 1975 } 1976 1977 return nil 1978 } 1979 1980 // DeleteDevice will return success if device has been marked for deferred 1981 // removal. If one wants to override that and want DeleteDevice() to fail if 1982 // device was busy and could not be deleted, set syncDelete=true. 1983 func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error { 1984 logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete) 1985 defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete) 1986 info, err := devices.lookupDeviceWithLock(hash) 1987 if err != nil { 1988 return err 1989 } 1990 1991 info.lock.Lock() 1992 defer info.lock.Unlock() 1993 1994 devices.Lock() 1995 defer devices.Unlock() 1996 1997 return devices.deleteDevice(info, syncDelete) 1998 } 1999 2000 func (devices *DeviceSet) deactivatePool() error { 2001 logrus.Debugf("devmapper: deactivatePool()") 2002 defer logrus.Debugf("devmapper: deactivatePool END") 2003 devname := devices.getPoolDevName() 2004 2005 devinfo, err := devicemapper.GetInfo(devname) 2006 if err != nil { 2007 return err 2008 } 2009 2010 if devinfo.Exists == 0 { 2011 return nil 2012 } 2013 if err := devicemapper.RemoveDevice(devname); err != nil { 2014 return err 2015 } 2016 2017 if d, err := devicemapper.GetDeps(devname); err == nil { 2018 logrus.Warnf("devmapper: device %s still has %d active dependents", devname, d.Count) 2019 } 2020 2021 return nil 2022 } 2023 2024 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 2025 logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash) 2026 defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash) 2027 2028 devinfo, err := devicemapper.GetInfo(info.Name()) 2029 if err != nil { 2030 return err 2031 } 2032 2033 if devinfo.Exists == 0 { 2034 return nil 2035 } 2036 2037 if devices.deferredRemove { 2038 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 2039 return err 2040 } 2041 } else { 2042 if err := devices.removeDevice(info.Name()); err != nil { 2043 return err 2044 } 2045 } 2046 return nil 2047 } 2048 2049 // Issues the underlying dm remove operation. 2050 func (devices *DeviceSet) removeDevice(devname string) error { 2051 var err error 2052 2053 logrus.Debugf("devmapper: removeDevice START(%s)", devname) 2054 defer logrus.Debugf("devmapper: removeDevice END(%s)", devname) 2055 2056 for i := 0; i < 200; i++ { 2057 err = devicemapper.RemoveDevice(devname) 2058 if err == nil { 2059 break 2060 } 2061 if err != devicemapper.ErrBusy { 2062 return err 2063 } 2064 2065 // If we see EBUSY it may be a transient error, 2066 // sleep a bit a retry a few times. 2067 devices.Unlock() 2068 time.Sleep(100 * time.Millisecond) 2069 devices.Lock() 2070 } 2071 2072 return err 2073 } 2074 2075 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 2076 if !devices.deferredRemove { 2077 return nil 2078 } 2079 2080 logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name()) 2081 defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name()) 2082 2083 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 2084 2085 if devinfo != nil && devinfo.DeferredRemove == 0 { 2086 return nil 2087 } 2088 2089 // Cancel deferred remove 2090 for i := 0; i < 100; i++ { 2091 err = devicemapper.CancelDeferredRemove(info.Name()) 2092 if err == nil { 2093 break 2094 } 2095 2096 if err == devicemapper.ErrEnxio { 2097 // Device is probably already gone. Return success. 2098 return nil 2099 } 2100 2101 if err != devicemapper.ErrBusy { 2102 return err 2103 } 2104 2105 // If we see EBUSY it may be a transient error, 2106 // sleep a bit a retry a few times. 2107 devices.Unlock() 2108 time.Sleep(100 * time.Millisecond) 2109 devices.Lock() 2110 } 2111 return err 2112 } 2113 2114 // Shutdown shuts down the device by unmounting the root. 2115 func (devices *DeviceSet) Shutdown(home string) error { 2116 logrus.Debugf("devmapper: [deviceset %s] Shutdown()", devices.devicePrefix) 2117 logrus.Debugf("devmapper: Shutting down DeviceSet: %s", devices.root) 2118 defer logrus.Debugf("devmapper: [deviceset %s] Shutdown() END", devices.devicePrefix) 2119 2120 // Stop deletion worker. This should start delivering new events to 2121 // ticker channel. That means no new instance of cleanupDeletedDevice() 2122 // will run after this call. If one instance is already running at 2123 // the time of the call, it must be holding devices.Lock() and 2124 // we will block on this lock till cleanup function exits. 2125 devices.deletionWorkerTicker.Stop() 2126 2127 devices.Lock() 2128 // Save DeviceSet Metadata first. Docker kills all threads if they 2129 // don't finish in certain time. It is possible that Shutdown() 2130 // routine does not finish in time as we loop trying to deactivate 2131 // some devices while these are busy. In that case shutdown() routine 2132 // will be killed and we will not get a chance to save deviceset 2133 // metadata. Hence save this early before trying to deactivate devices. 2134 devices.saveDeviceSetMetaData() 2135 2136 // ignore the error since it's just a best effort to not try to unmount something that's mounted 2137 mounts, _ := mount.GetMounts() 2138 mounted := make(map[string]bool, len(mounts)) 2139 for _, mnt := range mounts { 2140 mounted[mnt.Mountpoint] = true 2141 } 2142 2143 if err := filepath.Walk(path.Join(home, "mnt"), func(p string, info os.FileInfo, err error) error { 2144 if err != nil { 2145 return err 2146 } 2147 if !info.IsDir() { 2148 return nil 2149 } 2150 2151 if mounted[p] { 2152 // We use MNT_DETACH here in case it is still busy in some running 2153 // container. This means it'll go away from the global scope directly, 2154 // and the device will be released when that container dies. 2155 if err := syscall.Unmount(p, syscall.MNT_DETACH); err != nil { 2156 logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", p, err) 2157 } 2158 } 2159 2160 if devInfo, err := devices.lookupDevice(path.Base(p)); err != nil { 2161 logrus.Debugf("devmapper: Shutdown lookup device %s, error: %s", path.Base(p), err) 2162 } else { 2163 if err := devices.deactivateDevice(devInfo); err != nil { 2164 logrus.Debugf("devmapper: Shutdown deactivate %s , error: %s", devInfo.Hash, err) 2165 } 2166 } 2167 2168 return nil 2169 }); err != nil && !os.IsNotExist(err) { 2170 devices.Unlock() 2171 return err 2172 } 2173 2174 devices.Unlock() 2175 2176 info, _ := devices.lookupDeviceWithLock("") 2177 if info != nil { 2178 info.lock.Lock() 2179 devices.Lock() 2180 if err := devices.deactivateDevice(info); err != nil { 2181 logrus.Debugf("devmapper: Shutdown deactivate base , error: %s", err) 2182 } 2183 devices.Unlock() 2184 info.lock.Unlock() 2185 } 2186 2187 devices.Lock() 2188 if devices.thinPoolDevice == "" { 2189 if err := devices.deactivatePool(); err != nil { 2190 logrus.Debugf("devmapper: Shutdown deactivate pool , error: %s", err) 2191 } 2192 } 2193 devices.Unlock() 2194 2195 return nil 2196 } 2197 2198 // MountDevice mounts the device if not already mounted. 2199 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 2200 info, err := devices.lookupDeviceWithLock(hash) 2201 if err != nil { 2202 return err 2203 } 2204 2205 if info.Deleted { 2206 return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash) 2207 } 2208 2209 info.lock.Lock() 2210 defer info.lock.Unlock() 2211 2212 devices.Lock() 2213 defer devices.Unlock() 2214 2215 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2216 return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2217 } 2218 2219 fstype, err := ProbeFsType(info.DevName()) 2220 if err != nil { 2221 return err 2222 } 2223 2224 options := "" 2225 2226 if fstype == "xfs" { 2227 // XFS needs nouuid or it can't mount filesystems with the same fs 2228 options = joinMountOptions(options, "nouuid") 2229 } 2230 2231 options = joinMountOptions(options, devices.mountOptions) 2232 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 2233 2234 if err := mount.Mount(info.DevName(), path, fstype, options); err != nil { 2235 return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err) 2236 } 2237 2238 return nil 2239 } 2240 2241 // UnmountDevice unmounts the device and removes it from hash. 2242 func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { 2243 logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash) 2244 defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash) 2245 2246 info, err := devices.lookupDeviceWithLock(hash) 2247 if err != nil { 2248 return err 2249 } 2250 2251 info.lock.Lock() 2252 defer info.lock.Unlock() 2253 2254 devices.Lock() 2255 defer devices.Unlock() 2256 2257 logrus.Debugf("devmapper: Unmount(%s)", mountPath) 2258 if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil { 2259 return err 2260 } 2261 logrus.Debugf("devmapper: Unmount done") 2262 2263 if err := devices.deactivateDevice(info); err != nil { 2264 return err 2265 } 2266 2267 return nil 2268 } 2269 2270 // HasDevice returns true if the device metadata exists. 2271 func (devices *DeviceSet) HasDevice(hash string) bool { 2272 info, _ := devices.lookupDeviceWithLock(hash) 2273 return info != nil 2274 } 2275 2276 // List returns a list of device ids. 2277 func (devices *DeviceSet) List() []string { 2278 devices.Lock() 2279 defer devices.Unlock() 2280 2281 ids := make([]string, len(devices.Devices)) 2282 i := 0 2283 for k := range devices.Devices { 2284 ids[i] = k 2285 i++ 2286 } 2287 return ids 2288 } 2289 2290 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 2291 var params string 2292 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 2293 if err != nil { 2294 return 2295 } 2296 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 2297 return 2298 } 2299 return 2300 } 2301 2302 // GetDeviceStatus provides size, mapped sectors 2303 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 2304 info, err := devices.lookupDeviceWithLock(hash) 2305 if err != nil { 2306 return nil, err 2307 } 2308 2309 info.lock.Lock() 2310 defer info.lock.Unlock() 2311 2312 devices.Lock() 2313 defer devices.Unlock() 2314 2315 status := &DevStatus{ 2316 DeviceID: info.DeviceID, 2317 Size: info.Size, 2318 TransactionID: info.TransactionID, 2319 } 2320 2321 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2322 return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2323 } 2324 2325 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 2326 2327 if err != nil { 2328 return nil, err 2329 } 2330 2331 status.SizeInSectors = sizeInSectors 2332 status.MappedSectors = mappedSectors 2333 status.HighestMappedSector = highestMappedSector 2334 2335 return status, nil 2336 } 2337 2338 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 2339 var params string 2340 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 2341 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 2342 } 2343 return 2344 } 2345 2346 // DataDevicePath returns the path to the data storage for this deviceset, 2347 // regardless of loopback or block device 2348 func (devices *DeviceSet) DataDevicePath() string { 2349 return devices.dataDevice 2350 } 2351 2352 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 2353 // regardless of loopback or block device 2354 func (devices *DeviceSet) MetadataDevicePath() string { 2355 return devices.metadataDevice 2356 } 2357 2358 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 2359 buf := new(syscall.Statfs_t) 2360 if err := syscall.Statfs(loopFile, buf); err != nil { 2361 logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err) 2362 return 0, err 2363 } 2364 return buf.Bfree * uint64(buf.Bsize), nil 2365 } 2366 2367 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 2368 if loopFile != "" { 2369 fi, err := os.Stat(loopFile) 2370 if err != nil { 2371 logrus.Warnf("devmapper: Couldn't stat loopfile %v: %v", loopFile, err) 2372 return false, err 2373 } 2374 return fi.Mode().IsRegular(), nil 2375 } 2376 return false, nil 2377 } 2378 2379 // Status returns the current status of this deviceset 2380 func (devices *DeviceSet) Status() *Status { 2381 devices.Lock() 2382 defer devices.Unlock() 2383 2384 status := &Status{} 2385 2386 status.PoolName = devices.getPoolName() 2387 status.DataFile = devices.DataDevicePath() 2388 status.DataLoopback = devices.dataLoopFile 2389 status.MetadataFile = devices.MetadataDevicePath() 2390 status.MetadataLoopback = devices.metadataLoopFile 2391 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 2392 status.DeferredRemoveEnabled = devices.deferredRemove 2393 status.DeferredDeleteEnabled = devices.deferredDelete 2394 status.DeferredDeletedDeviceCount = devices.nrDeletedDevices 2395 status.BaseDeviceSize = devices.getBaseDeviceSize() 2396 status.BaseDeviceFS = devices.getBaseDeviceFS() 2397 2398 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 2399 if err == nil { 2400 // Convert from blocks to bytes 2401 blockSizeInSectors := totalSizeInSectors / dataTotal 2402 2403 status.Data.Used = dataUsed * blockSizeInSectors * 512 2404 status.Data.Total = dataTotal * blockSizeInSectors * 512 2405 status.Data.Available = status.Data.Total - status.Data.Used 2406 2407 // metadata blocks are always 4k 2408 status.Metadata.Used = metadataUsed * 4096 2409 status.Metadata.Total = metadataTotal * 4096 2410 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 2411 2412 status.SectorSize = blockSizeInSectors * 512 2413 2414 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 2415 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 2416 if err == nil && actualSpace < status.Data.Available { 2417 status.Data.Available = actualSpace 2418 } 2419 } 2420 2421 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 2422 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 2423 if err == nil && actualSpace < status.Metadata.Available { 2424 status.Metadata.Available = actualSpace 2425 } 2426 } 2427 } 2428 2429 return status 2430 } 2431 2432 // Status returns the current status of this deviceset 2433 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 2434 info, err := devices.lookupDeviceWithLock(hash) 2435 if err != nil { 2436 return nil, err 2437 } 2438 2439 info.lock.Lock() 2440 defer info.lock.Unlock() 2441 2442 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 2443 return metadata, nil 2444 } 2445 2446 // NewDeviceSet creates the device set based on the options provided. 2447 func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) { 2448 devicemapper.SetDevDir("/dev") 2449 2450 devices := &DeviceSet{ 2451 root: root, 2452 metaData: metaData{Devices: make(map[string]*devInfo)}, 2453 dataLoopbackSize: defaultDataLoopbackSize, 2454 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 2455 baseFsSize: defaultBaseFsSize, 2456 overrideUdevSyncCheck: defaultUdevSyncOverride, 2457 doBlkDiscard: true, 2458 thinpBlockSize: defaultThinpBlockSize, 2459 deviceIDMap: make([]byte, deviceIDMapSz), 2460 deletionWorkerTicker: time.NewTicker(time.Second * 30), 2461 uidMaps: uidMaps, 2462 gidMaps: gidMaps, 2463 minFreeSpacePercent: defaultMinFreeSpacePercent, 2464 } 2465 2466 foundBlkDiscard := false 2467 for _, option := range options { 2468 key, val, err := parsers.ParseKeyValueOpt(option) 2469 if err != nil { 2470 return nil, err 2471 } 2472 key = strings.ToLower(key) 2473 switch key { 2474 case "dm.basesize": 2475 size, err := units.RAMInBytes(val) 2476 if err != nil { 2477 return nil, err 2478 } 2479 userBaseSize = true 2480 devices.baseFsSize = uint64(size) 2481 case "dm.loopdatasize": 2482 size, err := units.RAMInBytes(val) 2483 if err != nil { 2484 return nil, err 2485 } 2486 devices.dataLoopbackSize = size 2487 case "dm.loopmetadatasize": 2488 size, err := units.RAMInBytes(val) 2489 if err != nil { 2490 return nil, err 2491 } 2492 devices.metaDataLoopbackSize = size 2493 case "dm.fs": 2494 if val != "ext4" && val != "xfs" { 2495 return nil, fmt.Errorf("devmapper: Unsupported filesystem %s\n", val) 2496 } 2497 devices.filesystem = val 2498 case "dm.mkfsarg": 2499 devices.mkfsArgs = append(devices.mkfsArgs, val) 2500 case "dm.mountopt": 2501 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2502 case "dm.metadatadev": 2503 devices.metadataDevice = val 2504 case "dm.datadev": 2505 devices.dataDevice = val 2506 case "dm.thinpooldev": 2507 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2508 case "dm.blkdiscard": 2509 foundBlkDiscard = true 2510 devices.doBlkDiscard, err = strconv.ParseBool(val) 2511 if err != nil { 2512 return nil, err 2513 } 2514 case "dm.blocksize": 2515 size, err := units.RAMInBytes(val) 2516 if err != nil { 2517 return nil, err 2518 } 2519 // convert to 512b sectors 2520 devices.thinpBlockSize = uint32(size) >> 9 2521 case "dm.override_udev_sync_check": 2522 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2523 if err != nil { 2524 return nil, err 2525 } 2526 2527 case "dm.use_deferred_removal": 2528 enableDeferredRemoval, err = strconv.ParseBool(val) 2529 if err != nil { 2530 return nil, err 2531 } 2532 2533 case "dm.use_deferred_deletion": 2534 enableDeferredDeletion, err = strconv.ParseBool(val) 2535 if err != nil { 2536 return nil, err 2537 } 2538 2539 case "dm.min_free_space": 2540 if !strings.HasSuffix(val, "%") { 2541 return nil, fmt.Errorf("devmapper: Option dm.min_free_space requires %% suffix") 2542 } 2543 2544 valstring := strings.TrimSuffix(val, "%") 2545 minFreeSpacePercent, err := strconv.ParseUint(valstring, 10, 32) 2546 if err != nil { 2547 return nil, err 2548 } 2549 2550 if minFreeSpacePercent >= 100 { 2551 return nil, fmt.Errorf("devmapper: Invalid value %v for option dm.min_free_space", val) 2552 } 2553 2554 devices.minFreeSpacePercent = uint32(minFreeSpacePercent) 2555 default: 2556 return nil, fmt.Errorf("devmapper: Unknown option %s\n", key) 2557 } 2558 } 2559 2560 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2561 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2562 devices.doBlkDiscard = false 2563 } 2564 2565 if err := devices.initDevmapper(doInit); err != nil { 2566 return nil, err 2567 } 2568 2569 return devices, nil 2570 }