github.com/brahmaroutu/docker@v1.2.1-0.20160809185609-eb28dde01f16/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "bufio" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 "path" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "syscall" 20 "time" 21 22 "github.com/Sirupsen/logrus" 23 24 "github.com/docker/docker/daemon/graphdriver" 25 "github.com/docker/docker/dockerversion" 26 "github.com/docker/docker/pkg/devicemapper" 27 "github.com/docker/docker/pkg/idtools" 28 "github.com/docker/docker/pkg/loopback" 29 "github.com/docker/docker/pkg/mount" 30 "github.com/docker/docker/pkg/parsers" 31 "github.com/docker/go-units" 32 33 "github.com/opencontainers/runc/libcontainer/label" 34 ) 35 36 var ( 37 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 38 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 39 defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024 40 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 41 defaultUdevSyncOverride = false 42 maxDeviceID = 0xffffff // 24 bit, pool limit 43 deviceIDMapSz = (maxDeviceID + 1) / 8 44 // We retry device removal so many a times that even error messages 45 // will fill up console during normal operation. So only log Fatal 46 // messages by default. 47 logLevel = devicemapper.LogLevelFatal 48 driverDeferredRemovalSupport = false 49 enableDeferredRemoval = false 50 enableDeferredDeletion = false 51 userBaseSize = false 52 defaultMinFreeSpacePercent uint32 = 10 53 ) 54 55 const deviceSetMetaFile string = "deviceset-metadata" 56 const transactionMetaFile string = "transaction-metadata" 57 58 type transaction struct { 59 OpenTransactionID uint64 `json:"open_transaction_id"` 60 DeviceIDHash string `json:"device_hash"` 61 DeviceID int `json:"device_id"` 62 } 63 64 type devInfo struct { 65 Hash string `json:"-"` 66 DeviceID int `json:"device_id"` 67 Size uint64 `json:"size"` 68 TransactionID uint64 `json:"transaction_id"` 69 Initialized bool `json:"initialized"` 70 Deleted bool `json:"deleted"` 71 devices *DeviceSet 72 73 // The global DeviceSet lock guarantees that we serialize all 74 // the calls to libdevmapper (which is not threadsafe), but we 75 // sometimes release that lock while sleeping. In that case 76 // this per-device lock is still held, protecting against 77 // other accesses to the device that we're doing the wait on. 78 // 79 // WARNING: In order to avoid AB-BA deadlocks when releasing 80 // the global lock while holding the per-device locks all 81 // device locks must be acquired *before* the device lock, and 82 // multiple device locks should be acquired parent before child. 83 lock sync.Mutex 84 } 85 86 type metaData struct { 87 Devices map[string]*devInfo `json:"Devices"` 88 } 89 90 // DeviceSet holds information about list of devices 91 type DeviceSet struct { 92 metaData `json:"-"` 93 sync.Mutex `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper 94 root string 95 devicePrefix string 96 TransactionID uint64 `json:"-"` 97 NextDeviceID int `json:"next_device_id"` 98 deviceIDMap []byte 99 100 // Options 101 dataLoopbackSize int64 102 metaDataLoopbackSize int64 103 baseFsSize uint64 104 filesystem string 105 mountOptions string 106 mkfsArgs []string 107 dataDevice string // block or loop dev 108 dataLoopFile string // loopback file, if used 109 metadataDevice string // block or loop dev 110 metadataLoopFile string // loopback file, if used 111 doBlkDiscard bool 112 thinpBlockSize uint32 113 thinPoolDevice string 114 transaction `json:"-"` 115 overrideUdevSyncCheck bool 116 deferredRemove bool // use deferred removal 117 deferredDelete bool // use deferred deletion 118 BaseDeviceUUID string // save UUID of base device 119 BaseDeviceFilesystem string // save filesystem of base device 120 nrDeletedDevices uint // number of deleted devices 121 deletionWorkerTicker *time.Ticker 122 uidMaps []idtools.IDMap 123 gidMaps []idtools.IDMap 124 minFreeSpacePercent uint32 //min free space percentage in thinpool 125 } 126 127 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 128 type DiskUsage struct { 129 // Used bytes on the disk. 130 Used uint64 131 // Total bytes on the disk. 132 Total uint64 133 // Available bytes on the disk. 134 Available uint64 135 } 136 137 // Status returns the information about the device. 138 type Status struct { 139 // PoolName is the name of the data pool. 140 PoolName string 141 // DataFile is the actual block device for data. 142 DataFile string 143 // DataLoopback loopback file, if used. 144 DataLoopback string 145 // MetadataFile is the actual block device for metadata. 146 MetadataFile string 147 // MetadataLoopback is the loopback file, if used. 148 MetadataLoopback string 149 // Data is the disk used for data. 150 Data DiskUsage 151 // Metadata is the disk used for meta data. 152 Metadata DiskUsage 153 // BaseDeviceSize is base size of container and image 154 BaseDeviceSize uint64 155 // BaseDeviceFS is backing filesystem. 156 BaseDeviceFS string 157 // SectorSize size of the vector. 158 SectorSize uint64 159 // UdevSyncSupported is true if sync is supported. 160 UdevSyncSupported bool 161 // DeferredRemoveEnabled is true then the device is not unmounted. 162 DeferredRemoveEnabled bool 163 // True if deferred deletion is enabled. This is different from 164 // deferred removal. "removal" means that device mapper device is 165 // deactivated. Thin device is still in thin pool and can be activated 166 // again. But "deletion" means that thin device will be deleted from 167 // thin pool and it can't be activated again. 168 DeferredDeleteEnabled bool 169 DeferredDeletedDeviceCount uint 170 MinFreeSpace uint64 171 } 172 173 // Structure used to export image/container metadata in docker inspect. 174 type deviceMetadata struct { 175 deviceID int 176 deviceSize uint64 // size in bytes 177 deviceName string // Device name as used during activation 178 } 179 180 // DevStatus returns information about device mounted containing its id, size and sector information. 181 type DevStatus struct { 182 // DeviceID is the id of the device. 183 DeviceID int 184 // Size is the size of the filesystem. 185 Size uint64 186 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 187 TransactionID uint64 188 // SizeInSectors indicates the size of the sectors allocated. 189 SizeInSectors uint64 190 // MappedSectors indicates number of mapped sectors. 191 MappedSectors uint64 192 // HighestMappedSector is the pointer to the highest mapped sector. 193 HighestMappedSector uint64 194 } 195 196 func getDevName(name string) string { 197 return "/dev/mapper/" + name 198 } 199 200 func (info *devInfo) Name() string { 201 hash := info.Hash 202 if hash == "" { 203 hash = "base" 204 } 205 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 206 } 207 208 func (info *devInfo) DevName() string { 209 return getDevName(info.Name()) 210 } 211 212 func (devices *DeviceSet) loopbackDir() string { 213 return path.Join(devices.root, "devicemapper") 214 } 215 216 func (devices *DeviceSet) metadataDir() string { 217 return path.Join(devices.root, "metadata") 218 } 219 220 func (devices *DeviceSet) metadataFile(info *devInfo) string { 221 file := info.Hash 222 if file == "" { 223 file = "base" 224 } 225 return path.Join(devices.metadataDir(), file) 226 } 227 228 func (devices *DeviceSet) transactionMetaFile() string { 229 return path.Join(devices.metadataDir(), transactionMetaFile) 230 } 231 232 func (devices *DeviceSet) deviceSetMetaFile() string { 233 return path.Join(devices.metadataDir(), deviceSetMetaFile) 234 } 235 236 func (devices *DeviceSet) oldMetadataFile() string { 237 return path.Join(devices.loopbackDir(), "json") 238 } 239 240 func (devices *DeviceSet) getPoolName() string { 241 if devices.thinPoolDevice == "" { 242 return devices.devicePrefix + "-pool" 243 } 244 return devices.thinPoolDevice 245 } 246 247 func (devices *DeviceSet) getPoolDevName() string { 248 return getDevName(devices.getPoolName()) 249 } 250 251 func (devices *DeviceSet) hasImage(name string) bool { 252 dirname := devices.loopbackDir() 253 filename := path.Join(dirname, name) 254 255 _, err := os.Stat(filename) 256 return err == nil 257 } 258 259 // ensureImage creates a sparse file of <size> bytes at the path 260 // <root>/devicemapper/<name>. 261 // If the file already exists and new size is larger than its current size, it grows to the new size. 262 // Either way it returns the full path. 263 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 264 dirname := devices.loopbackDir() 265 filename := path.Join(dirname, name) 266 267 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 268 if err != nil { 269 return "", err 270 } 271 if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) { 272 return "", err 273 } 274 275 if fi, err := os.Stat(filename); err != nil { 276 if !os.IsNotExist(err) { 277 return "", err 278 } 279 logrus.Debugf("devmapper: Creating loopback file %s for device-manage use", filename) 280 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 281 if err != nil { 282 return "", err 283 } 284 defer file.Close() 285 286 if err := file.Truncate(size); err != nil { 287 return "", err 288 } 289 } else { 290 if fi.Size() < size { 291 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 292 if err != nil { 293 return "", err 294 } 295 defer file.Close() 296 if err := file.Truncate(size); err != nil { 297 return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err) 298 } 299 } else if fi.Size() > size { 300 logrus.Warnf("devmapper: Can't shrink loopback file %s", filename) 301 } 302 } 303 return filename, nil 304 } 305 306 func (devices *DeviceSet) allocateTransactionID() uint64 { 307 devices.OpenTransactionID = devices.TransactionID + 1 308 return devices.OpenTransactionID 309 } 310 311 func (devices *DeviceSet) updatePoolTransactionID() error { 312 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 313 return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err) 314 } 315 devices.TransactionID = devices.OpenTransactionID 316 return nil 317 } 318 319 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 320 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 321 return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err) 322 } 323 return nil 324 } 325 326 // Given json data and file path, write it to disk 327 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 328 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 329 if err != nil { 330 return fmt.Errorf("devmapper: Error creating metadata file: %s", err) 331 } 332 333 n, err := tmpFile.Write(jsonData) 334 if err != nil { 335 return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err) 336 } 337 if n < len(jsonData) { 338 return io.ErrShortWrite 339 } 340 if err := tmpFile.Sync(); err != nil { 341 return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err) 342 } 343 if err := tmpFile.Close(); err != nil { 344 return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err) 345 } 346 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 347 return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err) 348 } 349 350 return nil 351 } 352 353 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 354 jsonData, err := json.Marshal(info) 355 if err != nil { 356 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 357 } 358 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 359 return err 360 } 361 return nil 362 } 363 364 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 365 var mask byte 366 i := deviceID % 8 367 mask = 1 << uint(i) 368 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 369 } 370 371 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 372 var mask byte 373 i := deviceID % 8 374 mask = ^(1 << uint(i)) 375 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 376 } 377 378 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 379 var mask byte 380 i := deviceID % 8 381 mask = (1 << uint(i)) 382 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 383 return false 384 } 385 return true 386 } 387 388 // Should be called with devices.Lock() held. 389 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 390 info := devices.Devices[hash] 391 if info == nil { 392 info = devices.loadMetadata(hash) 393 if info == nil { 394 return nil, fmt.Errorf("devmapper: Unknown device %s", hash) 395 } 396 397 devices.Devices[hash] = info 398 } 399 return info, nil 400 } 401 402 func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) { 403 devices.Lock() 404 defer devices.Unlock() 405 info, err := devices.lookupDevice(hash) 406 return info, err 407 } 408 409 // This function relies on that device hash map has been loaded in advance. 410 // Should be called with devices.Lock() held. 411 func (devices *DeviceSet) constructDeviceIDMap() { 412 logrus.Debugf("devmapper: constructDeviceIDMap()") 413 defer logrus.Debugf("devmapper: constructDeviceIDMap() END") 414 415 for _, info := range devices.Devices { 416 devices.markDeviceIDUsed(info.DeviceID) 417 logrus.Debugf("devmapper: Added deviceId=%d to DeviceIdMap", info.DeviceID) 418 } 419 } 420 421 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 422 423 // Skip some of the meta files which are not device files. 424 if strings.HasSuffix(finfo.Name(), ".migrated") { 425 logrus.Debugf("devmapper: Skipping file %s", path) 426 return nil 427 } 428 429 if strings.HasPrefix(finfo.Name(), ".") { 430 logrus.Debugf("devmapper: Skipping file %s", path) 431 return nil 432 } 433 434 if finfo.Name() == deviceSetMetaFile { 435 logrus.Debugf("devmapper: Skipping file %s", path) 436 return nil 437 } 438 439 if finfo.Name() == transactionMetaFile { 440 logrus.Debugf("devmapper: Skipping file %s", path) 441 return nil 442 } 443 444 logrus.Debugf("devmapper: Loading data for file %s", path) 445 446 hash := finfo.Name() 447 if hash == "base" { 448 hash = "" 449 } 450 451 // Include deleted devices also as cleanup delete device logic 452 // will go through it and see if there are any deleted devices. 453 if _, err := devices.lookupDevice(hash); err != nil { 454 return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err) 455 } 456 457 return nil 458 } 459 460 func (devices *DeviceSet) loadDeviceFilesOnStart() error { 461 logrus.Debugf("devmapper: loadDeviceFilesOnStart()") 462 defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END") 463 464 var scan = func(path string, info os.FileInfo, err error) error { 465 if err != nil { 466 logrus.Debugf("devmapper: Can't walk the file %s", path) 467 return nil 468 } 469 470 // Skip any directories 471 if info.IsDir() { 472 return nil 473 } 474 475 return devices.deviceFileWalkFunction(path, info) 476 } 477 478 return filepath.Walk(devices.metadataDir(), scan) 479 } 480 481 // Should be called with devices.Lock() held. 482 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 483 logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash) 484 info := &devInfo{ 485 Hash: hash, 486 DeviceID: id, 487 } 488 489 delete(devices.Devices, hash) 490 491 if err := devices.removeMetadata(info); err != nil { 492 logrus.Debugf("devmapper: Error removing metadata: %s", err) 493 return err 494 } 495 496 return nil 497 } 498 499 // Should be called with devices.Lock() held. 500 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 501 logrus.Debugf("devmapper: registerDevice(%v, %v)", id, hash) 502 info := &devInfo{ 503 Hash: hash, 504 DeviceID: id, 505 Size: size, 506 TransactionID: transactionID, 507 Initialized: false, 508 devices: devices, 509 } 510 511 devices.Devices[hash] = info 512 513 if err := devices.saveMetadata(info); err != nil { 514 // Try to remove unused device 515 delete(devices.Devices, hash) 516 return nil, err 517 } 518 519 return info, nil 520 } 521 522 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error { 523 logrus.Debugf("devmapper: activateDeviceIfNeeded(%v)", info.Hash) 524 525 if info.Deleted && !ignoreDeleted { 526 return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash) 527 } 528 529 // Make sure deferred removal on device is canceled, if one was 530 // scheduled. 531 if err := devices.cancelDeferredRemovalIfNeeded(info); err != nil { 532 return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err) 533 } 534 535 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 536 return nil 537 } 538 539 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 540 } 541 542 // Return true only if kernel supports xfs and mkfs.xfs is available 543 func xfsSupported() bool { 544 // Make sure mkfs.xfs is available 545 if _, err := exec.LookPath("mkfs.xfs"); err != nil { 546 return false 547 } 548 549 // Check if kernel supports xfs filesystem or not. 550 exec.Command("modprobe", "xfs").Run() 551 552 f, err := os.Open("/proc/filesystems") 553 if err != nil { 554 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 555 return false 556 } 557 defer f.Close() 558 559 s := bufio.NewScanner(f) 560 for s.Scan() { 561 if strings.HasSuffix(s.Text(), "\txfs") { 562 return true 563 } 564 } 565 566 if err := s.Err(); err != nil { 567 logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err) 568 } 569 return false 570 } 571 572 func determineDefaultFS() string { 573 if xfsSupported() { 574 return "xfs" 575 } 576 577 logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem") 578 return "ext4" 579 } 580 581 func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) { 582 devname := info.DevName() 583 584 args := []string{} 585 for _, arg := range devices.mkfsArgs { 586 args = append(args, arg) 587 } 588 589 args = append(args, devname) 590 591 if devices.filesystem == "" { 592 devices.filesystem = determineDefaultFS() 593 } 594 if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil { 595 return err 596 } 597 598 logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name()) 599 defer func() { 600 if err != nil { 601 logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err) 602 } else { 603 logrus.Infof("devmapper: Successfully created filesystem %s on device %s", devices.filesystem, info.Name()) 604 } 605 }() 606 607 switch devices.filesystem { 608 case "xfs": 609 err = exec.Command("mkfs.xfs", args...).Run() 610 case "ext4": 611 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 612 if err != nil { 613 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 614 } 615 if err != nil { 616 return err 617 } 618 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 619 default: 620 err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem) 621 } 622 return 623 } 624 625 func (devices *DeviceSet) migrateOldMetaData() error { 626 // Migrate old metadata file 627 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 628 if err != nil && !os.IsNotExist(err) { 629 return err 630 } 631 632 if jsonData != nil { 633 m := metaData{Devices: make(map[string]*devInfo)} 634 635 if err := json.Unmarshal(jsonData, &m); err != nil { 636 return err 637 } 638 639 for hash, info := range m.Devices { 640 info.Hash = hash 641 devices.saveMetadata(info) 642 } 643 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 644 return err 645 } 646 647 } 648 649 return nil 650 } 651 652 // Cleanup deleted devices. It assumes that all the devices have been 653 // loaded in the hash table. 654 func (devices *DeviceSet) cleanupDeletedDevices() error { 655 devices.Lock() 656 657 // If there are no deleted devices, there is nothing to do. 658 if devices.nrDeletedDevices == 0 { 659 devices.Unlock() 660 return nil 661 } 662 663 var deletedDevices []*devInfo 664 665 for _, info := range devices.Devices { 666 if !info.Deleted { 667 continue 668 } 669 logrus.Debugf("devmapper: Found deleted device %s.", info.Hash) 670 deletedDevices = append(deletedDevices, info) 671 } 672 673 // Delete the deleted devices. DeleteDevice() first takes the info lock 674 // and then devices.Lock(). So drop it to avoid deadlock. 675 devices.Unlock() 676 677 for _, info := range deletedDevices { 678 // This will again try deferred deletion. 679 if err := devices.DeleteDevice(info.Hash, false); err != nil { 680 logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err) 681 } 682 } 683 684 return nil 685 } 686 687 func (devices *DeviceSet) countDeletedDevices() { 688 for _, info := range devices.Devices { 689 if !info.Deleted { 690 continue 691 } 692 devices.nrDeletedDevices++ 693 } 694 } 695 696 func (devices *DeviceSet) startDeviceDeletionWorker() { 697 // Deferred deletion is not enabled. Don't do anything. 698 if !devices.deferredDelete { 699 return 700 } 701 702 logrus.Debug("devmapper: Worker to cleanup deleted devices started") 703 for range devices.deletionWorkerTicker.C { 704 devices.cleanupDeletedDevices() 705 } 706 } 707 708 func (devices *DeviceSet) initMetaData() error { 709 devices.Lock() 710 defer devices.Unlock() 711 712 if err := devices.migrateOldMetaData(); err != nil { 713 return err 714 } 715 716 _, transactionID, _, _, _, _, err := devices.poolStatus() 717 if err != nil { 718 return err 719 } 720 721 devices.TransactionID = transactionID 722 723 if err := devices.loadDeviceFilesOnStart(); err != nil { 724 return fmt.Errorf("devmapper: Failed to load device files:%v", err) 725 } 726 727 devices.constructDeviceIDMap() 728 devices.countDeletedDevices() 729 730 if err := devices.processPendingTransaction(); err != nil { 731 return err 732 } 733 734 // Start a goroutine to cleanup Deleted Devices 735 go devices.startDeviceDeletionWorker() 736 return nil 737 } 738 739 func (devices *DeviceSet) incNextDeviceID() { 740 // IDs are 24bit, so wrap around 741 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 742 } 743 744 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 745 devices.incNextDeviceID() 746 for i := 0; i <= maxDeviceID; i++ { 747 if devices.isDeviceIDFree(devices.NextDeviceID) { 748 devices.markDeviceIDUsed(devices.NextDeviceID) 749 return devices.NextDeviceID, nil 750 } 751 devices.incNextDeviceID() 752 } 753 754 return 0, fmt.Errorf("devmapper: Unable to find a free device ID") 755 } 756 757 func (devices *DeviceSet) poolHasFreeSpace() error { 758 if devices.minFreeSpacePercent == 0 { 759 return nil 760 } 761 762 _, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 763 if err != nil { 764 return err 765 } 766 767 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 768 if minFreeData < 1 { 769 minFreeData = 1 770 } 771 dataFree := dataTotal - dataUsed 772 if dataFree < minFreeData { 773 return fmt.Errorf("devmapper: Thin Pool has %v free data blocks which is less than minimum required %v free data blocks. Create more free space in thin pool or use dm.min_free_space option to change behavior", (dataTotal - dataUsed), minFreeData) 774 } 775 776 minFreeMetadata := (metadataTotal * uint64(devices.minFreeSpacePercent)) / 100 777 if minFreeMetadata < 1 { 778 minFreeMetadata = 1 779 } 780 781 metadataFree := metadataTotal - metadataUsed 782 if metadataFree < minFreeMetadata { 783 return fmt.Errorf("devmapper: Thin Pool has %v free metadata blocks which is less than minimum required %v free metadata blocks. Create more free metadata space in thin pool or use dm.min_free_space option to change behavior", (metadataTotal - metadataUsed), minFreeMetadata) 784 } 785 786 return nil 787 } 788 789 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 790 devices.Lock() 791 defer devices.Unlock() 792 793 deviceID, err := devices.getNextFreeDeviceID() 794 if err != nil { 795 return nil, err 796 } 797 798 if err := devices.openTransaction(hash, deviceID); err != nil { 799 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 800 devices.markDeviceIDFree(deviceID) 801 return nil, err 802 } 803 804 for { 805 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 806 if devicemapper.DeviceIDExists(err) { 807 // Device ID already exists. This should not 808 // happen. Now we have a mechanism to find 809 // a free device ID. So something is not right. 810 // Give a warning and continue. 811 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 812 deviceID, err = devices.getNextFreeDeviceID() 813 if err != nil { 814 return nil, err 815 } 816 // Save new device id into transaction 817 devices.refreshTransaction(deviceID) 818 continue 819 } 820 logrus.Debugf("devmapper: Error creating device: %s", err) 821 devices.markDeviceIDFree(deviceID) 822 return nil, err 823 } 824 break 825 } 826 827 logrus.Debugf("devmapper: Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 828 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 829 if err != nil { 830 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 831 devices.markDeviceIDFree(deviceID) 832 return nil, err 833 } 834 835 if err := devices.closeTransaction(); err != nil { 836 devices.unregisterDevice(deviceID, hash) 837 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 838 devices.markDeviceIDFree(deviceID) 839 return nil, err 840 } 841 return info, nil 842 } 843 844 func (devices *DeviceSet) takeSnapshot(hash string, baseInfo *devInfo, size uint64) error { 845 var ( 846 devinfo *devicemapper.Info 847 err error 848 ) 849 850 if err = devices.poolHasFreeSpace(); err != nil { 851 return err 852 } 853 854 if devices.deferredRemove { 855 devinfo, err = devicemapper.GetInfoWithDeferred(baseInfo.Name()) 856 if err != nil { 857 return err 858 } 859 if devinfo != nil && devinfo.DeferredRemove != 0 { 860 err = devices.cancelDeferredRemoval(baseInfo) 861 if err != nil { 862 // If Error is ErrEnxio. Device is probably already gone. Continue. 863 if err != devicemapper.ErrEnxio { 864 return err 865 } 866 } else { 867 defer devices.deactivateDevice(baseInfo) 868 } 869 } 870 } else { 871 devinfo, err = devicemapper.GetInfo(baseInfo.Name()) 872 if err != nil { 873 return err 874 } 875 } 876 877 doSuspend := devinfo != nil && devinfo.Exists != 0 878 879 if doSuspend { 880 if err = devicemapper.SuspendDevice(baseInfo.Name()); err != nil { 881 return err 882 } 883 defer devicemapper.ResumeDevice(baseInfo.Name()) 884 } 885 886 if err = devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil { 887 return err 888 } 889 890 return nil 891 } 892 893 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error { 894 deviceID, err := devices.getNextFreeDeviceID() 895 if err != nil { 896 return err 897 } 898 899 if err := devices.openTransaction(hash, deviceID); err != nil { 900 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceID = %d", hash, deviceID) 901 devices.markDeviceIDFree(deviceID) 902 return err 903 } 904 905 for { 906 if err := devicemapper.CreateSnapDeviceRaw(devices.getPoolDevName(), deviceID, baseInfo.DeviceID); err != nil { 907 if devicemapper.DeviceIDExists(err) { 908 // Device ID already exists. This should not 909 // happen. Now we have a mechanism to find 910 // a free device ID. So something is not right. 911 // Give a warning and continue. 912 logrus.Errorf("devmapper: Device ID %d exists in pool but it is supposed to be unused", deviceID) 913 deviceID, err = devices.getNextFreeDeviceID() 914 if err != nil { 915 return err 916 } 917 // Save new device id into transaction 918 devices.refreshTransaction(deviceID) 919 continue 920 } 921 logrus.Debugf("devmapper: Error creating snap device: %s", err) 922 devices.markDeviceIDFree(deviceID) 923 return err 924 } 925 break 926 } 927 928 if _, err := devices.registerDevice(deviceID, hash, size, devices.OpenTransactionID); err != nil { 929 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 930 devices.markDeviceIDFree(deviceID) 931 logrus.Debugf("devmapper: Error registering device: %s", err) 932 return err 933 } 934 935 if err := devices.closeTransaction(); err != nil { 936 devices.unregisterDevice(deviceID, hash) 937 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 938 devices.markDeviceIDFree(deviceID) 939 return err 940 } 941 return nil 942 } 943 944 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 945 info := &devInfo{Hash: hash, devices: devices} 946 947 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 948 if err != nil { 949 logrus.Debugf("devmapper: Failed to read %s with err: %v", devices.metadataFile(info), err) 950 return nil 951 } 952 953 if err := json.Unmarshal(jsonData, &info); err != nil { 954 logrus.Debugf("devmapper: Failed to unmarshal devInfo from %s with err: %v", devices.metadataFile(info), err) 955 return nil 956 } 957 958 if info.DeviceID > maxDeviceID { 959 logrus.Errorf("devmapper: Ignoring Invalid DeviceId=%d", info.DeviceID) 960 return nil 961 } 962 963 return info 964 } 965 966 func getDeviceUUID(device string) (string, error) { 967 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 968 if err != nil { 969 return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err) 970 } 971 972 uuid := strings.TrimSuffix(string(out), "\n") 973 uuid = strings.TrimSpace(uuid) 974 logrus.Debugf("devmapper: UUID for device: %s is:%s", device, uuid) 975 return uuid, nil 976 } 977 978 func (devices *DeviceSet) getBaseDeviceSize() uint64 { 979 info, _ := devices.lookupDevice("") 980 if info == nil { 981 return 0 982 } 983 return info.Size 984 } 985 986 func (devices *DeviceSet) getBaseDeviceFS() string { 987 return devices.BaseDeviceFilesystem 988 } 989 990 func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error { 991 devices.Lock() 992 defer devices.Unlock() 993 994 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 995 return err 996 } 997 defer devices.deactivateDevice(baseInfo) 998 999 uuid, err := getDeviceUUID(baseInfo.DevName()) 1000 if err != nil { 1001 return err 1002 } 1003 1004 if devices.BaseDeviceUUID != uuid { 1005 return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID) 1006 } 1007 1008 if devices.BaseDeviceFilesystem == "" { 1009 fsType, err := ProbeFsType(baseInfo.DevName()) 1010 if err != nil { 1011 return err 1012 } 1013 if err := devices.saveBaseDeviceFilesystem(fsType); err != nil { 1014 return err 1015 } 1016 } 1017 1018 // If user specified a filesystem using dm.fs option and current 1019 // file system of base image is not same, warn user that dm.fs 1020 // will be ignored. 1021 if devices.BaseDeviceFilesystem != devices.filesystem { 1022 logrus.Warnf("devmapper: Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem) 1023 devices.filesystem = devices.BaseDeviceFilesystem 1024 } 1025 return nil 1026 } 1027 1028 func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error { 1029 devices.BaseDeviceFilesystem = fs 1030 return devices.saveDeviceSetMetaData() 1031 } 1032 1033 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 1034 devices.Lock() 1035 defer devices.Unlock() 1036 1037 if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil { 1038 return err 1039 } 1040 defer devices.deactivateDevice(baseInfo) 1041 1042 uuid, err := getDeviceUUID(baseInfo.DevName()) 1043 if err != nil { 1044 return err 1045 } 1046 1047 devices.BaseDeviceUUID = uuid 1048 return devices.saveDeviceSetMetaData() 1049 } 1050 1051 func (devices *DeviceSet) createBaseImage() error { 1052 logrus.Debug("devmapper: Initializing base device-mapper thin volume") 1053 1054 // Create initial device 1055 info, err := devices.createRegisterDevice("") 1056 if err != nil { 1057 return err 1058 } 1059 1060 logrus.Debug("devmapper: Creating filesystem on base device-mapper thin volume") 1061 1062 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1063 return err 1064 } 1065 1066 if err := devices.createFilesystem(info); err != nil { 1067 return err 1068 } 1069 1070 info.Initialized = true 1071 if err := devices.saveMetadata(info); err != nil { 1072 info.Initialized = false 1073 return err 1074 } 1075 1076 if err := devices.saveBaseDeviceUUID(info); err != nil { 1077 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1078 } 1079 1080 return nil 1081 } 1082 1083 // Returns if thin pool device exists or not. If device exists, also makes 1084 // sure it is a thin pool device and not some other type of device. 1085 func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) { 1086 logrus.Debugf("devmapper: Checking for existence of the pool %s", thinPoolDevice) 1087 1088 info, err := devicemapper.GetInfo(thinPoolDevice) 1089 if err != nil { 1090 return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err) 1091 } 1092 1093 // Device does not exist. 1094 if info.Exists == 0 { 1095 return false, nil 1096 } 1097 1098 _, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice) 1099 if err != nil { 1100 return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err) 1101 } 1102 1103 if deviceType != "thin-pool" { 1104 return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice) 1105 } 1106 1107 return true, nil 1108 } 1109 1110 func (devices *DeviceSet) checkThinPool() error { 1111 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 1112 if err != nil { 1113 return err 1114 } 1115 if dataUsed != 0 { 1116 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks", 1117 devices.thinPoolDevice) 1118 } 1119 if transactionID != 0 { 1120 return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 1121 devices.thinPoolDevice) 1122 } 1123 return nil 1124 } 1125 1126 // Base image is initialized properly. Either save UUID for first time (for 1127 // upgrade case or verify UUID. 1128 func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error { 1129 // If BaseDeviceUUID is nil (upgrade case), save it and return success. 1130 if devices.BaseDeviceUUID == "" { 1131 if err := devices.saveBaseDeviceUUID(baseInfo); err != nil { 1132 return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err) 1133 } 1134 return nil 1135 } 1136 1137 if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil { 1138 return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed: %v", err) 1139 } 1140 1141 return nil 1142 } 1143 1144 func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error { 1145 1146 if !userBaseSize { 1147 return nil 1148 } 1149 1150 if devices.baseFsSize < devices.getBaseDeviceSize() { 1151 return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize()))) 1152 } 1153 1154 if devices.baseFsSize == devices.getBaseDeviceSize() { 1155 return nil 1156 } 1157 1158 info.lock.Lock() 1159 defer info.lock.Unlock() 1160 1161 devices.Lock() 1162 defer devices.Unlock() 1163 1164 info.Size = devices.baseFsSize 1165 1166 if err := devices.saveMetadata(info); err != nil { 1167 // Try to remove unused device 1168 delete(devices.Devices, info.Hash) 1169 return err 1170 } 1171 1172 return devices.growFS(info) 1173 } 1174 1175 func (devices *DeviceSet) growFS(info *devInfo) error { 1176 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 1177 return fmt.Errorf("Error activating devmapper device: %s", err) 1178 } 1179 1180 defer devices.deactivateDevice(info) 1181 1182 fsMountPoint := "/run/docker/mnt" 1183 if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) { 1184 if err := os.MkdirAll(fsMountPoint, 0700); err != nil { 1185 return err 1186 } 1187 defer os.RemoveAll(fsMountPoint) 1188 } 1189 1190 options := "" 1191 if devices.BaseDeviceFilesystem == "xfs" { 1192 // XFS needs nouuid or it can't mount filesystems with the same fs 1193 options = joinMountOptions(options, "nouuid") 1194 } 1195 options = joinMountOptions(options, devices.mountOptions) 1196 1197 if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil { 1198 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err) 1199 } 1200 1201 defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH) 1202 1203 switch devices.BaseDeviceFilesystem { 1204 case "ext4": 1205 if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil { 1206 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1207 } 1208 case "xfs": 1209 if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil { 1210 return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out)) 1211 } 1212 default: 1213 return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem) 1214 } 1215 return nil 1216 } 1217 1218 func (devices *DeviceSet) setupBaseImage() error { 1219 oldInfo, _ := devices.lookupDeviceWithLock("") 1220 1221 // base image already exists. If it is initialized properly, do UUID 1222 // verification and return. Otherwise remove image and set it up 1223 // fresh. 1224 1225 if oldInfo != nil { 1226 if oldInfo.Initialized && !oldInfo.Deleted { 1227 if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil { 1228 return err 1229 } 1230 1231 if err := devices.checkGrowBaseDeviceFS(oldInfo); err != nil { 1232 return err 1233 } 1234 1235 return nil 1236 } 1237 1238 logrus.Debug("devmapper: Removing uninitialized base image") 1239 // If previous base device is in deferred delete state, 1240 // that needs to be cleaned up first. So don't try 1241 // deferred deletion. 1242 if err := devices.DeleteDevice("", true); err != nil { 1243 return err 1244 } 1245 } 1246 1247 // If we are setting up base image for the first time, make sure 1248 // thin pool is empty. 1249 if devices.thinPoolDevice != "" && oldInfo == nil { 1250 if err := devices.checkThinPool(); err != nil { 1251 return err 1252 } 1253 } 1254 1255 // Create new base image device 1256 if err := devices.createBaseImage(); err != nil { 1257 return err 1258 } 1259 1260 return nil 1261 } 1262 1263 func setCloseOnExec(name string) { 1264 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 1265 for _, i := range fileInfos { 1266 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 1267 if link == name { 1268 fd, err := strconv.Atoi(i.Name()) 1269 if err == nil { 1270 syscall.CloseOnExec(fd) 1271 } 1272 } 1273 } 1274 } 1275 } 1276 1277 // DMLog implements logging using DevMapperLogger interface. 1278 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 1279 // By default libdm sends us all the messages including debug ones. 1280 // We need to filter out messages here and figure out which one 1281 // should be printed. 1282 if level > logLevel { 1283 return 1284 } 1285 1286 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1287 if level <= devicemapper.LogLevelErr { 1288 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1289 } else if level <= devicemapper.LogLevelInfo { 1290 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1291 } else { 1292 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 1293 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 1294 } 1295 } 1296 1297 func major(device uint64) uint64 { 1298 return (device >> 8) & 0xfff 1299 } 1300 1301 func minor(device uint64) uint64 { 1302 return (device & 0xff) | ((device >> 12) & 0xfff00) 1303 } 1304 1305 // ResizePool increases the size of the pool. 1306 func (devices *DeviceSet) ResizePool(size int64) error { 1307 dirname := devices.loopbackDir() 1308 datafilename := path.Join(dirname, "data") 1309 if len(devices.dataDevice) > 0 { 1310 datafilename = devices.dataDevice 1311 } 1312 metadatafilename := path.Join(dirname, "metadata") 1313 if len(devices.metadataDevice) > 0 { 1314 metadatafilename = devices.metadataDevice 1315 } 1316 1317 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 1318 if datafile == nil { 1319 return err 1320 } 1321 defer datafile.Close() 1322 1323 fi, err := datafile.Stat() 1324 if fi == nil { 1325 return err 1326 } 1327 1328 if fi.Size() > size { 1329 return fmt.Errorf("devmapper: Can't shrink file") 1330 } 1331 1332 dataloopback := loopback.FindLoopDeviceFor(datafile) 1333 if dataloopback == nil { 1334 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename) 1335 } 1336 defer dataloopback.Close() 1337 1338 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 1339 if metadatafile == nil { 1340 return err 1341 } 1342 defer metadatafile.Close() 1343 1344 metadataloopback := loopback.FindLoopDeviceFor(metadatafile) 1345 if metadataloopback == nil { 1346 return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename) 1347 } 1348 defer metadataloopback.Close() 1349 1350 // Grow loopback file 1351 if err := datafile.Truncate(size); err != nil { 1352 return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err) 1353 } 1354 1355 // Reload size for loopback device 1356 if err := loopback.SetCapacity(dataloopback); err != nil { 1357 return fmt.Errorf("Unable to update loopback capacity: %s", err) 1358 } 1359 1360 // Suspend the pool 1361 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 1362 return fmt.Errorf("devmapper: Unable to suspend pool: %s", err) 1363 } 1364 1365 // Reload with the new block sizes 1366 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 1367 return fmt.Errorf("devmapper: Unable to reload pool: %s", err) 1368 } 1369 1370 // Resume the pool 1371 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 1372 return fmt.Errorf("devmapper: Unable to resume pool: %s", err) 1373 } 1374 1375 return nil 1376 } 1377 1378 func (devices *DeviceSet) loadTransactionMetaData() error { 1379 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 1380 if err != nil { 1381 // There is no active transaction. This will be the case 1382 // during upgrade. 1383 if os.IsNotExist(err) { 1384 devices.OpenTransactionID = devices.TransactionID 1385 return nil 1386 } 1387 return err 1388 } 1389 1390 json.Unmarshal(jsonData, &devices.transaction) 1391 return nil 1392 } 1393 1394 func (devices *DeviceSet) saveTransactionMetaData() error { 1395 jsonData, err := json.Marshal(&devices.transaction) 1396 if err != nil { 1397 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1398 } 1399 1400 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 1401 } 1402 1403 func (devices *DeviceSet) removeTransactionMetaData() error { 1404 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 1405 return err 1406 } 1407 return nil 1408 } 1409 1410 func (devices *DeviceSet) rollbackTransaction() error { 1411 logrus.Debugf("devmapper: Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 1412 1413 // A device id might have already been deleted before transaction 1414 // closed. In that case this call will fail. Just leave a message 1415 // in case of failure. 1416 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 1417 logrus.Errorf("devmapper: Unable to delete device: %s", err) 1418 } 1419 1420 dinfo := &devInfo{Hash: devices.DeviceIDHash} 1421 if err := devices.removeMetadata(dinfo); err != nil { 1422 logrus.Errorf("devmapper: Unable to remove metadata: %s", err) 1423 } else { 1424 devices.markDeviceIDFree(devices.DeviceID) 1425 } 1426 1427 if err := devices.removeTransactionMetaData(); err != nil { 1428 logrus.Errorf("devmapper: Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 1429 } 1430 1431 return nil 1432 } 1433 1434 func (devices *DeviceSet) processPendingTransaction() error { 1435 if err := devices.loadTransactionMetaData(); err != nil { 1436 return err 1437 } 1438 1439 // If there was open transaction but pool transaction ID is same 1440 // as open transaction ID, nothing to roll back. 1441 if devices.TransactionID == devices.OpenTransactionID { 1442 return nil 1443 } 1444 1445 // If open transaction ID is less than pool transaction ID, something 1446 // is wrong. Bail out. 1447 if devices.OpenTransactionID < devices.TransactionID { 1448 logrus.Errorf("devmapper: Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1449 return nil 1450 } 1451 1452 // Pool transaction ID is not same as open transaction. There is 1453 // a transaction which was not completed. 1454 if err := devices.rollbackTransaction(); err != nil { 1455 return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err) 1456 } 1457 1458 devices.OpenTransactionID = devices.TransactionID 1459 return nil 1460 } 1461 1462 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1463 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1464 if err != nil { 1465 // For backward compatibility return success if file does 1466 // not exist. 1467 if os.IsNotExist(err) { 1468 return nil 1469 } 1470 return err 1471 } 1472 1473 return json.Unmarshal(jsonData, devices) 1474 } 1475 1476 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1477 jsonData, err := json.Marshal(devices) 1478 if err != nil { 1479 return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err) 1480 } 1481 1482 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1483 } 1484 1485 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1486 devices.allocateTransactionID() 1487 devices.DeviceIDHash = hash 1488 devices.DeviceID = DeviceID 1489 if err := devices.saveTransactionMetaData(); err != nil { 1490 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1491 } 1492 return nil 1493 } 1494 1495 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1496 devices.DeviceID = DeviceID 1497 if err := devices.saveTransactionMetaData(); err != nil { 1498 return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err) 1499 } 1500 return nil 1501 } 1502 1503 func (devices *DeviceSet) closeTransaction() error { 1504 if err := devices.updatePoolTransactionID(); err != nil { 1505 logrus.Debug("devmapper: Failed to close Transaction") 1506 return err 1507 } 1508 return nil 1509 } 1510 1511 func determineDriverCapabilities(version string) error { 1512 /* 1513 * Driver version 4.27.0 and greater support deferred activation 1514 * feature. 1515 */ 1516 1517 logrus.Debugf("devicemapper: driver version is %s", version) 1518 1519 versionSplit := strings.Split(version, ".") 1520 major, err := strconv.Atoi(versionSplit[0]) 1521 if err != nil { 1522 return graphdriver.ErrNotSupported 1523 } 1524 1525 if major > 4 { 1526 driverDeferredRemovalSupport = true 1527 return nil 1528 } 1529 1530 if major < 4 { 1531 return nil 1532 } 1533 1534 minor, err := strconv.Atoi(versionSplit[1]) 1535 if err != nil { 1536 return graphdriver.ErrNotSupported 1537 } 1538 1539 /* 1540 * If major is 4 and minor is 27, then there is no need to 1541 * check for patch level as it can not be less than 0. 1542 */ 1543 if minor >= 27 { 1544 driverDeferredRemovalSupport = true 1545 return nil 1546 } 1547 1548 return nil 1549 } 1550 1551 // Determine the major and minor number of loopback device 1552 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1553 stat, err := file.Stat() 1554 if err != nil { 1555 return 0, 0, err 1556 } 1557 1558 dev := stat.Sys().(*syscall.Stat_t).Rdev 1559 majorNum := major(dev) 1560 minorNum := minor(dev) 1561 1562 logrus.Debugf("devmapper: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1563 return majorNum, minorNum, nil 1564 } 1565 1566 // Given a file which is backing file of a loop back device, find the 1567 // loopback device name and its major/minor number. 1568 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1569 file, err := os.Open(filename) 1570 if err != nil { 1571 logrus.Debugf("devmapper: Failed to open file %s", filename) 1572 return "", 0, 0, err 1573 } 1574 1575 defer file.Close() 1576 loopbackDevice := loopback.FindLoopDeviceFor(file) 1577 if loopbackDevice == nil { 1578 return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename) 1579 } 1580 defer loopbackDevice.Close() 1581 1582 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1583 if err != nil { 1584 return "", 0, 0, err 1585 } 1586 return loopbackDevice.Name(), Major, Minor, nil 1587 } 1588 1589 // Get the major/minor numbers of thin pool data and metadata devices 1590 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1591 var params, poolDataMajMin, poolMetadataMajMin string 1592 1593 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1594 if err != nil { 1595 return 0, 0, 0, 0, err 1596 } 1597 1598 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1599 return 0, 0, 0, 0, err 1600 } 1601 1602 logrus.Debugf("devmapper: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1603 1604 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1605 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1606 if err != nil { 1607 return 0, 0, 0, 0, err 1608 } 1609 1610 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1611 if err != nil { 1612 return 0, 0, 0, 0, err 1613 } 1614 1615 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1616 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1617 if err != nil { 1618 return 0, 0, 0, 0, err 1619 } 1620 1621 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1622 if err != nil { 1623 return 0, 0, 0, 0, err 1624 } 1625 1626 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1627 } 1628 1629 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1630 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1631 if err != nil { 1632 return err 1633 } 1634 1635 dirname := devices.loopbackDir() 1636 1637 // data device has not been passed in. So there should be a data file 1638 // which is being mounted as loop device. 1639 if devices.dataDevice == "" { 1640 datafilename := path.Join(dirname, "data") 1641 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1642 if err != nil { 1643 return err 1644 } 1645 1646 // Compare the two 1647 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1648 devices.dataDevice = dataLoopDevice 1649 devices.dataLoopFile = datafilename 1650 } 1651 1652 } 1653 1654 // metadata device has not been passed in. So there should be a 1655 // metadata file which is being mounted as loop device. 1656 if devices.metadataDevice == "" { 1657 metadatafilename := path.Join(dirname, "metadata") 1658 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1659 if err != nil { 1660 return err 1661 } 1662 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1663 devices.metadataDevice = metadataLoopDevice 1664 devices.metadataLoopFile = metadatafilename 1665 } 1666 } 1667 1668 return nil 1669 } 1670 1671 func (devices *DeviceSet) enableDeferredRemovalDeletion() error { 1672 1673 // If user asked for deferred removal then check both libdm library 1674 // and kernel driver support deferred removal otherwise error out. 1675 if enableDeferredRemoval { 1676 if !driverDeferredRemovalSupport { 1677 return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it") 1678 } 1679 if !devicemapper.LibraryDeferredRemovalSupport { 1680 return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it") 1681 } 1682 logrus.Debug("devmapper: Deferred removal support enabled.") 1683 devices.deferredRemove = true 1684 } 1685 1686 if enableDeferredDeletion { 1687 if !devices.deferredRemove { 1688 return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter") 1689 } 1690 logrus.Debug("devmapper: Deferred deletion support enabled.") 1691 devices.deferredDelete = true 1692 } 1693 return nil 1694 } 1695 1696 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1697 // give ourselves to libdm as a log handler 1698 devicemapper.LogInit(devices) 1699 1700 version, err := devicemapper.GetDriverVersion() 1701 if err != nil { 1702 // Can't even get driver version, assume not supported 1703 return graphdriver.ErrNotSupported 1704 } 1705 1706 if err := determineDriverCapabilities(version); err != nil { 1707 return graphdriver.ErrNotSupported 1708 } 1709 1710 if err := devices.enableDeferredRemovalDeletion(); err != nil { 1711 return err 1712 } 1713 1714 // https://github.com/docker/docker/issues/4036 1715 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1716 if dockerversion.IAmStatic == "true" { 1717 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1718 } else { 1719 logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option") 1720 } 1721 1722 if !devices.overrideUdevSyncCheck { 1723 return graphdriver.ErrNotSupported 1724 } 1725 } 1726 1727 //create the root dir of the devmapper driver ownership to match this 1728 //daemon's remapped root uid/gid so containers can start properly 1729 uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps) 1730 if err != nil { 1731 return err 1732 } 1733 if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) { 1734 return err 1735 } 1736 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) { 1737 return err 1738 } 1739 1740 // Set the device prefix from the device id and inode of the docker root dir 1741 1742 st, err := os.Stat(devices.root) 1743 if err != nil { 1744 return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err) 1745 } 1746 sysSt := st.Sys().(*syscall.Stat_t) 1747 // "reg-" stands for "regular file". 1748 // In the future we might use "dev-" for "device file", etc. 1749 // docker-maj,min[-inode] stands for: 1750 // - Managed by docker 1751 // - The target of this device is at major <maj> and minor <min> 1752 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1753 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1754 logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix) 1755 1756 // Check for the existence of the thin-pool device 1757 poolExists, err := devices.thinPoolExists(devices.getPoolName()) 1758 if err != nil { 1759 return err 1760 } 1761 1762 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1763 // that are not Close-on-exec, 1764 // so we add this badhack to make sure it closes itself 1765 setCloseOnExec("/dev/mapper/control") 1766 1767 // Make sure the sparse images exist in <root>/devicemapper/data and 1768 // <root>/devicemapper/metadata 1769 1770 createdLoopback := false 1771 1772 // If the pool doesn't exist, create it 1773 if !poolExists && devices.thinPoolDevice == "" { 1774 logrus.Debug("devmapper: Pool doesn't exist. Creating it.") 1775 1776 var ( 1777 dataFile *os.File 1778 metadataFile *os.File 1779 ) 1780 1781 if devices.dataDevice == "" { 1782 // Make sure the sparse images exist in <root>/devicemapper/data 1783 1784 hasData := devices.hasImage("data") 1785 1786 if !doInit && !hasData { 1787 return errors.New("Loopback data file not found") 1788 } 1789 1790 if !hasData { 1791 createdLoopback = true 1792 } 1793 1794 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1795 if err != nil { 1796 logrus.Debugf("devmapper: Error device ensureImage (data): %s", err) 1797 return err 1798 } 1799 1800 dataFile, err = loopback.AttachLoopDevice(data) 1801 if err != nil { 1802 return err 1803 } 1804 devices.dataLoopFile = data 1805 devices.dataDevice = dataFile.Name() 1806 } else { 1807 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1808 if err != nil { 1809 return err 1810 } 1811 } 1812 defer dataFile.Close() 1813 1814 if devices.metadataDevice == "" { 1815 // Make sure the sparse images exist in <root>/devicemapper/metadata 1816 1817 hasMetadata := devices.hasImage("metadata") 1818 1819 if !doInit && !hasMetadata { 1820 return errors.New("Loopback metadata file not found") 1821 } 1822 1823 if !hasMetadata { 1824 createdLoopback = true 1825 } 1826 1827 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1828 if err != nil { 1829 logrus.Debugf("devmapper: Error device ensureImage (metadata): %s", err) 1830 return err 1831 } 1832 1833 metadataFile, err = loopback.AttachLoopDevice(metadata) 1834 if err != nil { 1835 return err 1836 } 1837 devices.metadataLoopFile = metadata 1838 devices.metadataDevice = metadataFile.Name() 1839 } else { 1840 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1841 if err != nil { 1842 return err 1843 } 1844 } 1845 defer metadataFile.Close() 1846 1847 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1848 return err 1849 } 1850 } 1851 1852 // Pool already exists and caller did not pass us a pool. That means 1853 // we probably created pool earlier and could not remove it as some 1854 // containers were still using it. Detect some of the properties of 1855 // pool, like is it using loop devices. 1856 if poolExists && devices.thinPoolDevice == "" { 1857 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1858 logrus.Debugf("devmapper: Failed to load thin pool loopback device information:%v", err) 1859 return err 1860 } 1861 } 1862 1863 // If we didn't just create the data or metadata image, we need to 1864 // load the transaction id and migrate old metadata 1865 if !createdLoopback { 1866 if err := devices.initMetaData(); err != nil { 1867 return err 1868 } 1869 } 1870 1871 if devices.thinPoolDevice == "" { 1872 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1873 logrus.Warn("devmapper: Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1874 } 1875 } 1876 1877 // Right now this loads only NextDeviceID. If there is more metadata 1878 // down the line, we might have to move it earlier. 1879 if err := devices.loadDeviceSetMetaData(); err != nil { 1880 return err 1881 } 1882 1883 // Setup the base image 1884 if doInit { 1885 if err := devices.setupBaseImage(); err != nil { 1886 logrus.Debugf("devmapper: Error device setupBaseImage: %s", err) 1887 return err 1888 } 1889 } 1890 1891 return nil 1892 } 1893 1894 // AddDevice adds a device and registers in the hash. 1895 func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string]string) error { 1896 logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash) 1897 defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1898 1899 // If a deleted device exists, return error. 1900 baseInfo, err := devices.lookupDeviceWithLock(baseHash) 1901 if err != nil { 1902 return err 1903 } 1904 1905 if baseInfo.Deleted { 1906 return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash) 1907 } 1908 1909 baseInfo.lock.Lock() 1910 defer baseInfo.lock.Unlock() 1911 1912 devices.Lock() 1913 defer devices.Unlock() 1914 1915 // Also include deleted devices in case hash of new device is 1916 // same as one of the deleted devices. 1917 if info, _ := devices.lookupDevice(hash); info != nil { 1918 return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted) 1919 } 1920 1921 size, err := devices.parseStorageOpt(storageOpt) 1922 if err != nil { 1923 return err 1924 } 1925 1926 if size == 0 { 1927 size = baseInfo.Size 1928 } 1929 1930 if size < baseInfo.Size { 1931 return fmt.Errorf("devmapper: Container size cannot be smaller than %s", units.HumanSize(float64(baseInfo.Size))) 1932 } 1933 1934 if err := devices.takeSnapshot(hash, baseInfo, size); err != nil { 1935 return err 1936 } 1937 1938 // Grow the container rootfs. 1939 if size > baseInfo.Size { 1940 info, err := devices.lookupDevice(hash) 1941 if err != nil { 1942 return err 1943 } 1944 1945 if err := devices.growFS(info); err != nil { 1946 return err 1947 } 1948 } 1949 1950 return nil 1951 } 1952 1953 func (devices *DeviceSet) parseStorageOpt(storageOpt map[string]string) (uint64, error) { 1954 1955 // Read size to change the block device size per container. 1956 for key, val := range storageOpt { 1957 key := strings.ToLower(key) 1958 switch key { 1959 case "size": 1960 size, err := units.RAMInBytes(val) 1961 if err != nil { 1962 return 0, err 1963 } 1964 return uint64(size), nil 1965 default: 1966 return 0, fmt.Errorf("Unknown option %s", key) 1967 } 1968 } 1969 1970 return 0, nil 1971 } 1972 1973 func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error { 1974 // If device is already in deleted state, there is nothing to be done. 1975 if info.Deleted { 1976 return nil 1977 } 1978 1979 logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash) 1980 1981 info.Deleted = true 1982 1983 // save device metadata to reflect deleted state. 1984 if err := devices.saveMetadata(info); err != nil { 1985 info.Deleted = false 1986 return err 1987 } 1988 1989 devices.nrDeletedDevices++ 1990 return nil 1991 } 1992 1993 // Should be called with devices.Lock() held. 1994 func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error { 1995 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1996 logrus.Debugf("devmapper: Error opening transaction hash = %s deviceId = %d", "", info.DeviceID) 1997 return err 1998 } 1999 2000 defer devices.closeTransaction() 2001 2002 err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID) 2003 if err != nil { 2004 // If syncDelete is true, we want to return error. If deferred 2005 // deletion is not enabled, we return an error. If error is 2006 // something other then EBUSY, return an error. 2007 if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy { 2008 logrus.Debugf("devmapper: Error deleting device: %s", err) 2009 return err 2010 } 2011 } 2012 2013 if err == nil { 2014 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 2015 return err 2016 } 2017 // If device was already in deferred delete state that means 2018 // deletion was being tried again later. Reduce the deleted 2019 // device count. 2020 if info.Deleted { 2021 devices.nrDeletedDevices-- 2022 } 2023 devices.markDeviceIDFree(info.DeviceID) 2024 } else { 2025 if err := devices.markForDeferredDeletion(info); err != nil { 2026 return err 2027 } 2028 } 2029 2030 return nil 2031 } 2032 2033 // Issue discard only if device open count is zero. 2034 func (devices *DeviceSet) issueDiscard(info *devInfo) error { 2035 logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash) 2036 defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash) 2037 // This is a workaround for the kernel not discarding block so 2038 // on the thin pool when we remove a thinp device, so we do it 2039 // manually. 2040 // Even if device is deferred deleted, activate it and issue 2041 // discards. 2042 if err := devices.activateDeviceIfNeeded(info, true); err != nil { 2043 return err 2044 } 2045 2046 devinfo, err := devicemapper.GetInfo(info.Name()) 2047 if err != nil { 2048 return err 2049 } 2050 2051 if devinfo.OpenCount != 0 { 2052 logrus.Debugf("devmapper: Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount) 2053 return nil 2054 } 2055 2056 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 2057 logrus.Debugf("devmapper: Error discarding block on device: %s (ignoring)", err) 2058 } 2059 return nil 2060 } 2061 2062 // Should be called with devices.Lock() held. 2063 func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error { 2064 if devices.doBlkDiscard { 2065 devices.issueDiscard(info) 2066 } 2067 2068 // Try to deactivate device in case it is active. 2069 if err := devices.deactivateDevice(info); err != nil { 2070 logrus.Debugf("devmapper: Error deactivating device: %s", err) 2071 return err 2072 } 2073 2074 if err := devices.deleteTransaction(info, syncDelete); err != nil { 2075 return err 2076 } 2077 2078 return nil 2079 } 2080 2081 // DeleteDevice will return success if device has been marked for deferred 2082 // removal. If one wants to override that and want DeleteDevice() to fail if 2083 // device was busy and could not be deleted, set syncDelete=true. 2084 func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error { 2085 logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete) 2086 defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete) 2087 info, err := devices.lookupDeviceWithLock(hash) 2088 if err != nil { 2089 return err 2090 } 2091 2092 info.lock.Lock() 2093 defer info.lock.Unlock() 2094 2095 devices.Lock() 2096 defer devices.Unlock() 2097 2098 return devices.deleteDevice(info, syncDelete) 2099 } 2100 2101 func (devices *DeviceSet) deactivatePool() error { 2102 logrus.Debug("devmapper: deactivatePool()") 2103 defer logrus.Debug("devmapper: deactivatePool END") 2104 devname := devices.getPoolDevName() 2105 2106 devinfo, err := devicemapper.GetInfo(devname) 2107 if err != nil { 2108 return err 2109 } 2110 2111 if devinfo.Exists == 0 { 2112 return nil 2113 } 2114 if err := devicemapper.RemoveDevice(devname); err != nil { 2115 return err 2116 } 2117 2118 if d, err := devicemapper.GetDeps(devname); err == nil { 2119 logrus.Warnf("devmapper: device %s still has %d active dependents", devname, d.Count) 2120 } 2121 2122 return nil 2123 } 2124 2125 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 2126 logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash) 2127 defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash) 2128 2129 devinfo, err := devicemapper.GetInfo(info.Name()) 2130 if err != nil { 2131 return err 2132 } 2133 2134 if devinfo.Exists == 0 { 2135 return nil 2136 } 2137 2138 if devices.deferredRemove { 2139 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 2140 return err 2141 } 2142 } else { 2143 if err := devices.removeDevice(info.Name()); err != nil { 2144 return err 2145 } 2146 } 2147 return nil 2148 } 2149 2150 // Issues the underlying dm remove operation. 2151 func (devices *DeviceSet) removeDevice(devname string) error { 2152 var err error 2153 2154 logrus.Debugf("devmapper: removeDevice START(%s)", devname) 2155 defer logrus.Debugf("devmapper: removeDevice END(%s)", devname) 2156 2157 for i := 0; i < 200; i++ { 2158 err = devicemapper.RemoveDevice(devname) 2159 if err == nil { 2160 break 2161 } 2162 if err != devicemapper.ErrBusy { 2163 return err 2164 } 2165 2166 // If we see EBUSY it may be a transient error, 2167 // sleep a bit a retry a few times. 2168 devices.Unlock() 2169 time.Sleep(100 * time.Millisecond) 2170 devices.Lock() 2171 } 2172 2173 return err 2174 } 2175 2176 func (devices *DeviceSet) cancelDeferredRemovalIfNeeded(info *devInfo) error { 2177 if !devices.deferredRemove { 2178 return nil 2179 } 2180 2181 logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded START(%s)", info.Name()) 2182 defer logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded END(%s)", info.Name()) 2183 2184 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 2185 if err != nil { 2186 return err 2187 } 2188 2189 if devinfo != nil && devinfo.DeferredRemove == 0 { 2190 return nil 2191 } 2192 2193 // Cancel deferred remove 2194 if err := devices.cancelDeferredRemoval(info); err != nil { 2195 // If Error is ErrEnxio. Device is probably already gone. Continue. 2196 if err != devicemapper.ErrEnxio { 2197 return err 2198 } 2199 } 2200 return nil 2201 } 2202 2203 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 2204 logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name()) 2205 defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name()) 2206 2207 var err error 2208 2209 // Cancel deferred remove 2210 for i := 0; i < 100; i++ { 2211 err = devicemapper.CancelDeferredRemove(info.Name()) 2212 if err != nil { 2213 if err == devicemapper.ErrBusy { 2214 // If we see EBUSY it may be a transient error, 2215 // sleep a bit a retry a few times. 2216 devices.Unlock() 2217 time.Sleep(100 * time.Millisecond) 2218 devices.Lock() 2219 continue 2220 } 2221 } 2222 break 2223 } 2224 return err 2225 } 2226 2227 // Shutdown shuts down the device by unmounting the root. 2228 func (devices *DeviceSet) Shutdown(home string) error { 2229 logrus.Debugf("devmapper: [deviceset %s] Shutdown()", devices.devicePrefix) 2230 logrus.Debugf("devmapper: Shutting down DeviceSet: %s", devices.root) 2231 defer logrus.Debugf("devmapper: [deviceset %s] Shutdown() END", devices.devicePrefix) 2232 2233 // Stop deletion worker. This should start delivering new events to 2234 // ticker channel. That means no new instance of cleanupDeletedDevice() 2235 // will run after this call. If one instance is already running at 2236 // the time of the call, it must be holding devices.Lock() and 2237 // we will block on this lock till cleanup function exits. 2238 devices.deletionWorkerTicker.Stop() 2239 2240 devices.Lock() 2241 // Save DeviceSet Metadata first. Docker kills all threads if they 2242 // don't finish in certain time. It is possible that Shutdown() 2243 // routine does not finish in time as we loop trying to deactivate 2244 // some devices while these are busy. In that case shutdown() routine 2245 // will be killed and we will not get a chance to save deviceset 2246 // metadata. Hence save this early before trying to deactivate devices. 2247 devices.saveDeviceSetMetaData() 2248 2249 // ignore the error since it's just a best effort to not try to unmount something that's mounted 2250 mounts, _ := mount.GetMounts() 2251 mounted := make(map[string]bool, len(mounts)) 2252 for _, mnt := range mounts { 2253 mounted[mnt.Mountpoint] = true 2254 } 2255 2256 if err := filepath.Walk(path.Join(home, "mnt"), func(p string, info os.FileInfo, err error) error { 2257 if err != nil { 2258 return err 2259 } 2260 if !info.IsDir() { 2261 return nil 2262 } 2263 2264 if mounted[p] { 2265 // We use MNT_DETACH here in case it is still busy in some running 2266 // container. This means it'll go away from the global scope directly, 2267 // and the device will be released when that container dies. 2268 if err := syscall.Unmount(p, syscall.MNT_DETACH); err != nil { 2269 logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", p, err) 2270 } 2271 } 2272 2273 if devInfo, err := devices.lookupDevice(path.Base(p)); err != nil { 2274 logrus.Debugf("devmapper: Shutdown lookup device %s, error: %s", path.Base(p), err) 2275 } else { 2276 if err := devices.deactivateDevice(devInfo); err != nil { 2277 logrus.Debugf("devmapper: Shutdown deactivate %s , error: %s", devInfo.Hash, err) 2278 } 2279 } 2280 2281 return nil 2282 }); err != nil && !os.IsNotExist(err) { 2283 devices.Unlock() 2284 return err 2285 } 2286 2287 devices.Unlock() 2288 2289 info, _ := devices.lookupDeviceWithLock("") 2290 if info != nil { 2291 info.lock.Lock() 2292 devices.Lock() 2293 if err := devices.deactivateDevice(info); err != nil { 2294 logrus.Debugf("devmapper: Shutdown deactivate base , error: %s", err) 2295 } 2296 devices.Unlock() 2297 info.lock.Unlock() 2298 } 2299 2300 devices.Lock() 2301 if devices.thinPoolDevice == "" { 2302 if err := devices.deactivatePool(); err != nil { 2303 logrus.Debugf("devmapper: Shutdown deactivate pool , error: %s", err) 2304 } 2305 } 2306 devices.Unlock() 2307 2308 return nil 2309 } 2310 2311 // MountDevice mounts the device if not already mounted. 2312 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 2313 info, err := devices.lookupDeviceWithLock(hash) 2314 if err != nil { 2315 return err 2316 } 2317 2318 if info.Deleted { 2319 return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash) 2320 } 2321 2322 info.lock.Lock() 2323 defer info.lock.Unlock() 2324 2325 devices.Lock() 2326 defer devices.Unlock() 2327 2328 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2329 return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2330 } 2331 2332 fstype, err := ProbeFsType(info.DevName()) 2333 if err != nil { 2334 return err 2335 } 2336 2337 options := "" 2338 2339 if fstype == "xfs" { 2340 // XFS needs nouuid or it can't mount filesystems with the same fs 2341 options = joinMountOptions(options, "nouuid") 2342 } 2343 2344 options = joinMountOptions(options, devices.mountOptions) 2345 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 2346 2347 if err := mount.Mount(info.DevName(), path, fstype, options); err != nil { 2348 return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err) 2349 } 2350 2351 return nil 2352 } 2353 2354 // UnmountDevice unmounts the device and removes it from hash. 2355 func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { 2356 logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash) 2357 defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash) 2358 2359 info, err := devices.lookupDeviceWithLock(hash) 2360 if err != nil { 2361 return err 2362 } 2363 2364 info.lock.Lock() 2365 defer info.lock.Unlock() 2366 2367 devices.Lock() 2368 defer devices.Unlock() 2369 2370 logrus.Debugf("devmapper: Unmount(%s)", mountPath) 2371 if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil { 2372 return err 2373 } 2374 logrus.Debug("devmapper: Unmount done") 2375 2376 if err := devices.deactivateDevice(info); err != nil { 2377 return err 2378 } 2379 2380 return nil 2381 } 2382 2383 // HasDevice returns true if the device metadata exists. 2384 func (devices *DeviceSet) HasDevice(hash string) bool { 2385 info, _ := devices.lookupDeviceWithLock(hash) 2386 return info != nil 2387 } 2388 2389 // List returns a list of device ids. 2390 func (devices *DeviceSet) List() []string { 2391 devices.Lock() 2392 defer devices.Unlock() 2393 2394 ids := make([]string, len(devices.Devices)) 2395 i := 0 2396 for k := range devices.Devices { 2397 ids[i] = k 2398 i++ 2399 } 2400 return ids 2401 } 2402 2403 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 2404 var params string 2405 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 2406 if err != nil { 2407 return 2408 } 2409 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 2410 return 2411 } 2412 return 2413 } 2414 2415 // GetDeviceStatus provides size, mapped sectors 2416 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 2417 info, err := devices.lookupDeviceWithLock(hash) 2418 if err != nil { 2419 return nil, err 2420 } 2421 2422 info.lock.Lock() 2423 defer info.lock.Unlock() 2424 2425 devices.Lock() 2426 defer devices.Unlock() 2427 2428 status := &DevStatus{ 2429 DeviceID: info.DeviceID, 2430 Size: info.Size, 2431 TransactionID: info.TransactionID, 2432 } 2433 2434 if err := devices.activateDeviceIfNeeded(info, false); err != nil { 2435 return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err) 2436 } 2437 2438 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 2439 2440 if err != nil { 2441 return nil, err 2442 } 2443 2444 status.SizeInSectors = sizeInSectors 2445 status.MappedSectors = mappedSectors 2446 status.HighestMappedSector = highestMappedSector 2447 2448 return status, nil 2449 } 2450 2451 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 2452 var params string 2453 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 2454 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 2455 } 2456 return 2457 } 2458 2459 // DataDevicePath returns the path to the data storage for this deviceset, 2460 // regardless of loopback or block device 2461 func (devices *DeviceSet) DataDevicePath() string { 2462 return devices.dataDevice 2463 } 2464 2465 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 2466 // regardless of loopback or block device 2467 func (devices *DeviceSet) MetadataDevicePath() string { 2468 return devices.metadataDevice 2469 } 2470 2471 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 2472 buf := new(syscall.Statfs_t) 2473 if err := syscall.Statfs(loopFile, buf); err != nil { 2474 logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err) 2475 return 0, err 2476 } 2477 return buf.Bfree * uint64(buf.Bsize), nil 2478 } 2479 2480 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 2481 if loopFile != "" { 2482 fi, err := os.Stat(loopFile) 2483 if err != nil { 2484 logrus.Warnf("devmapper: Couldn't stat loopfile %v: %v", loopFile, err) 2485 return false, err 2486 } 2487 return fi.Mode().IsRegular(), nil 2488 } 2489 return false, nil 2490 } 2491 2492 // Status returns the current status of this deviceset 2493 func (devices *DeviceSet) Status() *Status { 2494 devices.Lock() 2495 defer devices.Unlock() 2496 2497 status := &Status{} 2498 2499 status.PoolName = devices.getPoolName() 2500 status.DataFile = devices.DataDevicePath() 2501 status.DataLoopback = devices.dataLoopFile 2502 status.MetadataFile = devices.MetadataDevicePath() 2503 status.MetadataLoopback = devices.metadataLoopFile 2504 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 2505 status.DeferredRemoveEnabled = devices.deferredRemove 2506 status.DeferredDeleteEnabled = devices.deferredDelete 2507 status.DeferredDeletedDeviceCount = devices.nrDeletedDevices 2508 status.BaseDeviceSize = devices.getBaseDeviceSize() 2509 status.BaseDeviceFS = devices.getBaseDeviceFS() 2510 2511 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 2512 if err == nil { 2513 // Convert from blocks to bytes 2514 blockSizeInSectors := totalSizeInSectors / dataTotal 2515 2516 status.Data.Used = dataUsed * blockSizeInSectors * 512 2517 status.Data.Total = dataTotal * blockSizeInSectors * 512 2518 status.Data.Available = status.Data.Total - status.Data.Used 2519 2520 // metadata blocks are always 4k 2521 status.Metadata.Used = metadataUsed * 4096 2522 status.Metadata.Total = metadataTotal * 4096 2523 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 2524 2525 status.SectorSize = blockSizeInSectors * 512 2526 2527 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 2528 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 2529 if err == nil && actualSpace < status.Data.Available { 2530 status.Data.Available = actualSpace 2531 } 2532 } 2533 2534 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 2535 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 2536 if err == nil && actualSpace < status.Metadata.Available { 2537 status.Metadata.Available = actualSpace 2538 } 2539 } 2540 2541 minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100 2542 status.MinFreeSpace = minFreeData * blockSizeInSectors * 512 2543 } 2544 2545 return status 2546 } 2547 2548 // Status returns the current status of this deviceset 2549 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 2550 info, err := devices.lookupDeviceWithLock(hash) 2551 if err != nil { 2552 return nil, err 2553 } 2554 2555 info.lock.Lock() 2556 defer info.lock.Unlock() 2557 2558 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 2559 return metadata, nil 2560 } 2561 2562 // NewDeviceSet creates the device set based on the options provided. 2563 func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) { 2564 devicemapper.SetDevDir("/dev") 2565 2566 devices := &DeviceSet{ 2567 root: root, 2568 metaData: metaData{Devices: make(map[string]*devInfo)}, 2569 dataLoopbackSize: defaultDataLoopbackSize, 2570 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 2571 baseFsSize: defaultBaseFsSize, 2572 overrideUdevSyncCheck: defaultUdevSyncOverride, 2573 doBlkDiscard: true, 2574 thinpBlockSize: defaultThinpBlockSize, 2575 deviceIDMap: make([]byte, deviceIDMapSz), 2576 deletionWorkerTicker: time.NewTicker(time.Second * 30), 2577 uidMaps: uidMaps, 2578 gidMaps: gidMaps, 2579 minFreeSpacePercent: defaultMinFreeSpacePercent, 2580 } 2581 2582 foundBlkDiscard := false 2583 for _, option := range options { 2584 key, val, err := parsers.ParseKeyValueOpt(option) 2585 if err != nil { 2586 return nil, err 2587 } 2588 key = strings.ToLower(key) 2589 switch key { 2590 case "dm.basesize": 2591 size, err := units.RAMInBytes(val) 2592 if err != nil { 2593 return nil, err 2594 } 2595 userBaseSize = true 2596 devices.baseFsSize = uint64(size) 2597 case "dm.loopdatasize": 2598 size, err := units.RAMInBytes(val) 2599 if err != nil { 2600 return nil, err 2601 } 2602 devices.dataLoopbackSize = size 2603 case "dm.loopmetadatasize": 2604 size, err := units.RAMInBytes(val) 2605 if err != nil { 2606 return nil, err 2607 } 2608 devices.metaDataLoopbackSize = size 2609 case "dm.fs": 2610 if val != "ext4" && val != "xfs" { 2611 return nil, fmt.Errorf("devmapper: Unsupported filesystem %s\n", val) 2612 } 2613 devices.filesystem = val 2614 case "dm.mkfsarg": 2615 devices.mkfsArgs = append(devices.mkfsArgs, val) 2616 case "dm.mountopt": 2617 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2618 case "dm.metadatadev": 2619 devices.metadataDevice = val 2620 case "dm.datadev": 2621 devices.dataDevice = val 2622 case "dm.thinpooldev": 2623 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2624 case "dm.blkdiscard": 2625 foundBlkDiscard = true 2626 devices.doBlkDiscard, err = strconv.ParseBool(val) 2627 if err != nil { 2628 return nil, err 2629 } 2630 case "dm.blocksize": 2631 size, err := units.RAMInBytes(val) 2632 if err != nil { 2633 return nil, err 2634 } 2635 // convert to 512b sectors 2636 devices.thinpBlockSize = uint32(size) >> 9 2637 case "dm.override_udev_sync_check": 2638 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2639 if err != nil { 2640 return nil, err 2641 } 2642 2643 case "dm.use_deferred_removal": 2644 enableDeferredRemoval, err = strconv.ParseBool(val) 2645 if err != nil { 2646 return nil, err 2647 } 2648 2649 case "dm.use_deferred_deletion": 2650 enableDeferredDeletion, err = strconv.ParseBool(val) 2651 if err != nil { 2652 return nil, err 2653 } 2654 2655 case "dm.min_free_space": 2656 if !strings.HasSuffix(val, "%") { 2657 return nil, fmt.Errorf("devmapper: Option dm.min_free_space requires %% suffix") 2658 } 2659 2660 valstring := strings.TrimSuffix(val, "%") 2661 minFreeSpacePercent, err := strconv.ParseUint(valstring, 10, 32) 2662 if err != nil { 2663 return nil, err 2664 } 2665 2666 if minFreeSpacePercent >= 100 { 2667 return nil, fmt.Errorf("devmapper: Invalid value %v for option dm.min_free_space", val) 2668 } 2669 2670 devices.minFreeSpacePercent = uint32(minFreeSpacePercent) 2671 default: 2672 return nil, fmt.Errorf("devmapper: Unknown option %s\n", key) 2673 } 2674 } 2675 2676 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2677 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2678 devices.doBlkDiscard = false 2679 } 2680 2681 if err := devices.initDevmapper(doInit); err != nil { 2682 return nil, err 2683 } 2684 2685 return devices, nil 2686 }