github.com/mheon/docker@v0.11.2-0.20150922122814-44f47903a831/daemon/graphdriver/devmapper/deviceset.go (about) 1 // +build linux 2 3 package devmapper 4 5 import ( 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "os/exec" 13 "path" 14 "path/filepath" 15 "strconv" 16 "strings" 17 "sync" 18 "syscall" 19 "time" 20 21 "github.com/Sirupsen/logrus" 22 "github.com/docker/docker/daemon/graphdriver" 23 "github.com/docker/docker/pkg/devicemapper" 24 "github.com/docker/docker/pkg/parsers" 25 "github.com/docker/docker/pkg/units" 26 "github.com/opencontainers/runc/libcontainer/label" 27 ) 28 29 var ( 30 defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024 31 defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024 32 defaultBaseFsSize uint64 = 100 * 1024 * 1024 * 1024 33 defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors 34 defaultUdevSyncOverride = false 35 maxDeviceID = 0xffffff // 24 bit, pool limit 36 deviceIDMapSz = (maxDeviceID + 1) / 8 37 // We retry device removal so many a times that even error messages 38 // will fill up console during normal operation. So only log Fatal 39 // messages by default. 40 logLevel = devicemapper.LogLevelFatal 41 driverDeferredRemovalSupport = false 42 enableDeferredRemoval = false 43 ) 44 45 const deviceSetMetaFile string = "deviceset-metadata" 46 const transactionMetaFile string = "transaction-metadata" 47 48 type transaction struct { 49 OpenTransactionID uint64 `json:"open_transaction_id"` 50 DeviceIDHash string `json:"device_hash"` 51 DeviceID int `json:"device_id"` 52 } 53 54 type devInfo struct { 55 Hash string `json:"-"` 56 DeviceID int `json:"device_id"` 57 Size uint64 `json:"size"` 58 TransactionID uint64 `json:"transaction_id"` 59 Initialized bool `json:"initialized"` 60 devices *DeviceSet 61 62 mountCount int 63 mountPath string 64 65 // The global DeviceSet lock guarantees that we serialize all 66 // the calls to libdevmapper (which is not threadsafe), but we 67 // sometimes release that lock while sleeping. In that case 68 // this per-device lock is still held, protecting against 69 // other accesses to the device that we're doing the wait on. 70 // 71 // WARNING: In order to avoid AB-BA deadlocks when releasing 72 // the global lock while holding the per-device locks all 73 // device locks must be acquired *before* the device lock, and 74 // multiple device locks should be acquired parent before child. 75 lock sync.Mutex 76 } 77 78 type metaData struct { 79 Devices map[string]*devInfo `json:"Devices"` 80 devicesLock sync.Mutex // Protects all read/writes to Devices map 81 } 82 83 // DeviceSet holds information about list of devices 84 type DeviceSet struct { 85 metaData `json:"-"` 86 sync.Mutex `json:"-"` // Protects Devices map and serializes calls into libdevmapper 87 root string 88 devicePrefix string 89 TransactionID uint64 `json:"-"` 90 NextDeviceID int `json:"next_device_id"` 91 deviceIDMap []byte 92 93 // Options 94 dataLoopbackSize int64 95 metaDataLoopbackSize int64 96 baseFsSize uint64 97 filesystem string 98 mountOptions string 99 mkfsArgs []string 100 dataDevice string // block or loop dev 101 dataLoopFile string // loopback file, if used 102 metadataDevice string // block or loop dev 103 metadataLoopFile string // loopback file, if used 104 doBlkDiscard bool 105 thinpBlockSize uint32 106 thinPoolDevice string 107 transaction `json:"-"` 108 overrideUdevSyncCheck bool 109 deferredRemove bool // use deferred removal 110 BaseDeviceUUID string //save UUID of base device 111 } 112 113 // DiskUsage contains information about disk usage and is used when reporting Status of a device. 114 type DiskUsage struct { 115 // Used bytes on the disk. 116 Used uint64 117 // Total bytes on the disk. 118 Total uint64 119 // Available bytes on the disk. 120 Available uint64 121 } 122 123 // Status returns the information about the device. 124 type Status struct { 125 // PoolName is the name of the data pool. 126 PoolName string 127 // DataFile is the actual block device for data. 128 DataFile string 129 // DataLoopback loopback file, if used. 130 DataLoopback string 131 // MetadataFile is the actual block device for metadata. 132 MetadataFile string 133 // MetadataLoopback is the loopback file, if used. 134 MetadataLoopback string 135 // Data is the disk used for data. 136 Data DiskUsage 137 // Metadata is the disk used for meta data. 138 Metadata DiskUsage 139 // SectorSize size of the vector. 140 SectorSize uint64 141 // UdevSyncSupported is true if sync is supported. 142 UdevSyncSupported bool 143 // DeferredRemoveEnabled is true then the device is not unmounted. 144 DeferredRemoveEnabled bool 145 } 146 147 // Structure used to export image/container metadata in docker inspect. 148 type deviceMetadata struct { 149 deviceID int 150 deviceSize uint64 // size in bytes 151 deviceName string // Device name as used during activation 152 } 153 154 // DevStatus returns information about device mounted containing its id, size and sector information. 155 type DevStatus struct { 156 // DeviceID is the id of the device. 157 DeviceID int 158 // Size is the size of the filesystem. 159 Size uint64 160 // TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental. 161 TransactionID uint64 162 // SizeInSectors indicates the size of the sectors allocated. 163 SizeInSectors uint64 164 // MappedSectors indicates number of mapped sectors. 165 MappedSectors uint64 166 // HighestMappedSector is the pointer to the highest mapped sector. 167 HighestMappedSector uint64 168 } 169 170 func getDevName(name string) string { 171 return "/dev/mapper/" + name 172 } 173 174 func (info *devInfo) Name() string { 175 hash := info.Hash 176 if hash == "" { 177 hash = "base" 178 } 179 return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash) 180 } 181 182 func (info *devInfo) DevName() string { 183 return getDevName(info.Name()) 184 } 185 186 func (devices *DeviceSet) loopbackDir() string { 187 return path.Join(devices.root, "devicemapper") 188 } 189 190 func (devices *DeviceSet) metadataDir() string { 191 return path.Join(devices.root, "metadata") 192 } 193 194 func (devices *DeviceSet) metadataFile(info *devInfo) string { 195 file := info.Hash 196 if file == "" { 197 file = "base" 198 } 199 return path.Join(devices.metadataDir(), file) 200 } 201 202 func (devices *DeviceSet) transactionMetaFile() string { 203 return path.Join(devices.metadataDir(), transactionMetaFile) 204 } 205 206 func (devices *DeviceSet) deviceSetMetaFile() string { 207 return path.Join(devices.metadataDir(), deviceSetMetaFile) 208 } 209 210 func (devices *DeviceSet) oldMetadataFile() string { 211 return path.Join(devices.loopbackDir(), "json") 212 } 213 214 func (devices *DeviceSet) getPoolName() string { 215 if devices.thinPoolDevice == "" { 216 return devices.devicePrefix + "-pool" 217 } 218 return devices.thinPoolDevice 219 } 220 221 func (devices *DeviceSet) getPoolDevName() string { 222 return getDevName(devices.getPoolName()) 223 } 224 225 func (devices *DeviceSet) hasImage(name string) bool { 226 dirname := devices.loopbackDir() 227 filename := path.Join(dirname, name) 228 229 _, err := os.Stat(filename) 230 return err == nil 231 } 232 233 // ensureImage creates a sparse file of <size> bytes at the path 234 // <root>/devicemapper/<name>. 235 // If the file already exists, it does nothing. 236 // Either way it returns the full path. 237 func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) { 238 dirname := devices.loopbackDir() 239 filename := path.Join(dirname, name) 240 241 if err := os.MkdirAll(dirname, 0700); err != nil { 242 return "", err 243 } 244 245 if _, err := os.Stat(filename); err != nil { 246 if !os.IsNotExist(err) { 247 return "", err 248 } 249 logrus.Debugf("Creating loopback file %s for device-manage use", filename) 250 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600) 251 if err != nil { 252 return "", err 253 } 254 defer file.Close() 255 256 if err := file.Truncate(size); err != nil { 257 return "", err 258 } 259 } 260 return filename, nil 261 } 262 263 func (devices *DeviceSet) allocateTransactionID() uint64 { 264 devices.OpenTransactionID = devices.TransactionID + 1 265 return devices.OpenTransactionID 266 } 267 268 func (devices *DeviceSet) updatePoolTransactionID() error { 269 if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil { 270 return fmt.Errorf("Error setting devmapper transaction ID: %s", err) 271 } 272 devices.TransactionID = devices.OpenTransactionID 273 return nil 274 } 275 276 func (devices *DeviceSet) removeMetadata(info *devInfo) error { 277 if err := os.RemoveAll(devices.metadataFile(info)); err != nil { 278 return fmt.Errorf("Error removing metadata file %s: %s", devices.metadataFile(info), err) 279 } 280 return nil 281 } 282 283 // Given json data and file path, write it to disk 284 func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error { 285 tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp") 286 if err != nil { 287 return fmt.Errorf("Error creating metadata file: %s", err) 288 } 289 290 n, err := tmpFile.Write(jsonData) 291 if err != nil { 292 return fmt.Errorf("Error writing metadata to %s: %s", tmpFile.Name(), err) 293 } 294 if n < len(jsonData) { 295 return io.ErrShortWrite 296 } 297 if err := tmpFile.Sync(); err != nil { 298 return fmt.Errorf("Error syncing metadata file %s: %s", tmpFile.Name(), err) 299 } 300 if err := tmpFile.Close(); err != nil { 301 return fmt.Errorf("Error closing metadata file %s: %s", tmpFile.Name(), err) 302 } 303 if err := os.Rename(tmpFile.Name(), filePath); err != nil { 304 return fmt.Errorf("Error committing metadata file %s: %s", tmpFile.Name(), err) 305 } 306 307 return nil 308 } 309 310 func (devices *DeviceSet) saveMetadata(info *devInfo) error { 311 jsonData, err := json.Marshal(info) 312 if err != nil { 313 return fmt.Errorf("Error encoding metadata to json: %s", err) 314 } 315 if err := devices.writeMetaFile(jsonData, devices.metadataFile(info)); err != nil { 316 return err 317 } 318 return nil 319 } 320 321 func (devices *DeviceSet) markDeviceIDUsed(deviceID int) { 322 var mask byte 323 i := deviceID % 8 324 mask = 1 << uint(i) 325 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask 326 } 327 328 func (devices *DeviceSet) markDeviceIDFree(deviceID int) { 329 var mask byte 330 i := deviceID % 8 331 mask = ^(1 << uint(i)) 332 devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask 333 } 334 335 func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool { 336 var mask byte 337 i := deviceID % 8 338 mask = (1 << uint(i)) 339 if (devices.deviceIDMap[deviceID/8] & mask) != 0 { 340 return false 341 } 342 return true 343 } 344 345 func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) { 346 devices.devicesLock.Lock() 347 defer devices.devicesLock.Unlock() 348 info := devices.Devices[hash] 349 if info == nil { 350 info = devices.loadMetadata(hash) 351 if info == nil { 352 return nil, fmt.Errorf("Unknown device %s", hash) 353 } 354 355 devices.Devices[hash] = info 356 } 357 return info, nil 358 } 359 360 func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error { 361 362 // Skip some of the meta files which are not device files. 363 if strings.HasSuffix(finfo.Name(), ".migrated") { 364 logrus.Debugf("Skipping file %s", path) 365 return nil 366 } 367 368 if strings.HasPrefix(finfo.Name(), ".") { 369 logrus.Debugf("Skipping file %s", path) 370 return nil 371 } 372 373 if finfo.Name() == deviceSetMetaFile { 374 logrus.Debugf("Skipping file %s", path) 375 return nil 376 } 377 378 logrus.Debugf("Loading data for file %s", path) 379 380 hash := finfo.Name() 381 if hash == "base" { 382 hash = "" 383 } 384 385 dinfo := devices.loadMetadata(hash) 386 if dinfo == nil { 387 return fmt.Errorf("Error loading device metadata file %s", hash) 388 } 389 390 if dinfo.DeviceID > maxDeviceID { 391 logrus.Errorf("Ignoring Invalid DeviceID=%d", dinfo.DeviceID) 392 return nil 393 } 394 395 devices.Lock() 396 devices.markDeviceIDUsed(dinfo.DeviceID) 397 devices.Unlock() 398 399 logrus.Debugf("Added deviceID=%d to DeviceIDMap", dinfo.DeviceID) 400 return nil 401 } 402 403 func (devices *DeviceSet) constructDeviceIDMap() error { 404 logrus.Debugf("[deviceset] constructDeviceIDMap()") 405 defer logrus.Debugf("[deviceset] constructDeviceIDMap() END") 406 407 var scan = func(path string, info os.FileInfo, err error) error { 408 if err != nil { 409 logrus.Debugf("Can't walk the file %s", path) 410 return nil 411 } 412 413 // Skip any directories 414 if info.IsDir() { 415 return nil 416 } 417 418 return devices.deviceFileWalkFunction(path, info) 419 } 420 421 return filepath.Walk(devices.metadataDir(), scan) 422 } 423 424 func (devices *DeviceSet) unregisterDevice(id int, hash string) error { 425 logrus.Debugf("unregisterDevice(%v, %v)", id, hash) 426 info := &devInfo{ 427 Hash: hash, 428 DeviceID: id, 429 } 430 431 devices.devicesLock.Lock() 432 delete(devices.Devices, hash) 433 devices.devicesLock.Unlock() 434 435 if err := devices.removeMetadata(info); err != nil { 436 logrus.Debugf("Error removing metadata: %s", err) 437 return err 438 } 439 440 return nil 441 } 442 443 func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) { 444 logrus.Debugf("registerDevice(%v, %v)", id, hash) 445 info := &devInfo{ 446 Hash: hash, 447 DeviceID: id, 448 Size: size, 449 TransactionID: transactionID, 450 Initialized: false, 451 devices: devices, 452 } 453 454 devices.devicesLock.Lock() 455 devices.Devices[hash] = info 456 devices.devicesLock.Unlock() 457 458 if err := devices.saveMetadata(info); err != nil { 459 // Try to remove unused device 460 devices.devicesLock.Lock() 461 delete(devices.Devices, hash) 462 devices.devicesLock.Unlock() 463 return nil, err 464 } 465 466 return info, nil 467 } 468 469 func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo) error { 470 logrus.Debugf("activateDeviceIfNeeded(%v)", info.Hash) 471 472 // Make sure deferred removal on device is canceled, if one was 473 // scheduled. 474 if err := devices.cancelDeferredRemoval(info); err != nil { 475 return fmt.Errorf("Deivce Deferred Removal Cancellation Failed: %s", err) 476 } 477 478 if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 { 479 return nil 480 } 481 482 return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size) 483 } 484 485 func (devices *DeviceSet) createFilesystem(info *devInfo) error { 486 devname := info.DevName() 487 488 args := []string{} 489 for _, arg := range devices.mkfsArgs { 490 args = append(args, arg) 491 } 492 493 args = append(args, devname) 494 495 var err error 496 switch devices.filesystem { 497 case "xfs": 498 err = exec.Command("mkfs.xfs", args...).Run() 499 case "ext4": 500 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run() 501 if err != nil { 502 err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run() 503 } 504 if err != nil { 505 return err 506 } 507 err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run() 508 default: 509 err = fmt.Errorf("Unsupported filesystem type %s", devices.filesystem) 510 } 511 if err != nil { 512 return err 513 } 514 515 return nil 516 } 517 518 func (devices *DeviceSet) migrateOldMetaData() error { 519 // Migrate old metadata file 520 jsonData, err := ioutil.ReadFile(devices.oldMetadataFile()) 521 if err != nil && !os.IsNotExist(err) { 522 return err 523 } 524 525 if jsonData != nil { 526 m := metaData{Devices: make(map[string]*devInfo)} 527 528 if err := json.Unmarshal(jsonData, &m); err != nil { 529 return err 530 } 531 532 for hash, info := range m.Devices { 533 info.Hash = hash 534 devices.saveMetadata(info) 535 } 536 if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil { 537 return err 538 } 539 540 } 541 542 return nil 543 } 544 545 func (devices *DeviceSet) initMetaData() error { 546 if err := devices.migrateOldMetaData(); err != nil { 547 return err 548 } 549 550 _, transactionID, _, _, _, _, err := devices.poolStatus() 551 if err != nil { 552 return err 553 } 554 555 devices.TransactionID = transactionID 556 557 if err := devices.constructDeviceIDMap(); err != nil { 558 return err 559 } 560 561 if err := devices.processPendingTransaction(); err != nil { 562 return err 563 } 564 return nil 565 } 566 567 func (devices *DeviceSet) incNextDeviceID() { 568 // IDs are 24bit, so wrap around 569 devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID 570 } 571 572 func (devices *DeviceSet) getNextFreeDeviceID() (int, error) { 573 devices.incNextDeviceID() 574 for i := 0; i <= maxDeviceID; i++ { 575 if devices.isDeviceIDFree(devices.NextDeviceID) { 576 devices.markDeviceIDUsed(devices.NextDeviceID) 577 return devices.NextDeviceID, nil 578 } 579 devices.incNextDeviceID() 580 } 581 582 return 0, fmt.Errorf("Unable to find a free device ID") 583 } 584 585 func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) { 586 deviceID, err := devices.getNextFreeDeviceID() 587 if err != nil { 588 return nil, err 589 } 590 591 if err := devices.openTransaction(hash, deviceID); err != nil { 592 logrus.Debugf("Error opening transaction hash = %s deviceID = %d", hash, deviceID) 593 devices.markDeviceIDFree(deviceID) 594 return nil, err 595 } 596 597 for { 598 if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil { 599 if devicemapper.DeviceIDExists(err) { 600 // Device ID already exists. This should not 601 // happen. Now we have a mechianism to find 602 // a free device ID. So something is not right. 603 // Give a warning and continue. 604 logrus.Errorf("Device ID %d exists in pool but it is supposed to be unused", deviceID) 605 deviceID, err = devices.getNextFreeDeviceID() 606 if err != nil { 607 return nil, err 608 } 609 // Save new device id into transaction 610 devices.refreshTransaction(deviceID) 611 continue 612 } 613 logrus.Debugf("Error creating device: %s", err) 614 devices.markDeviceIDFree(deviceID) 615 return nil, err 616 } 617 break 618 } 619 620 logrus.Debugf("Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize) 621 info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID) 622 if err != nil { 623 _ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 624 devices.markDeviceIDFree(deviceID) 625 return nil, err 626 } 627 628 if err := devices.closeTransaction(); err != nil { 629 devices.unregisterDevice(deviceID, hash) 630 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 631 devices.markDeviceIDFree(deviceID) 632 return nil, err 633 } 634 return info, nil 635 } 636 637 func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo) error { 638 deviceID, err := devices.getNextFreeDeviceID() 639 if err != nil { 640 return err 641 } 642 643 if err := devices.openTransaction(hash, deviceID); err != nil { 644 logrus.Debugf("Error opening transaction hash = %s deviceID = %d", hash, deviceID) 645 devices.markDeviceIDFree(deviceID) 646 return err 647 } 648 649 for { 650 if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil { 651 if devicemapper.DeviceIDExists(err) { 652 // Device ID already exists. This should not 653 // happen. Now we have a mechianism to find 654 // a free device ID. So something is not right. 655 // Give a warning and continue. 656 logrus.Errorf("Device ID %d exists in pool but it is supposed to be unused", deviceID) 657 deviceID, err = devices.getNextFreeDeviceID() 658 if err != nil { 659 return err 660 } 661 // Save new device id into transaction 662 devices.refreshTransaction(deviceID) 663 continue 664 } 665 logrus.Debugf("Error creating snap device: %s", err) 666 devices.markDeviceIDFree(deviceID) 667 return err 668 } 669 break 670 } 671 672 if _, err := devices.registerDevice(deviceID, hash, baseInfo.Size, devices.OpenTransactionID); err != nil { 673 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 674 devices.markDeviceIDFree(deviceID) 675 logrus.Debugf("Error registering device: %s", err) 676 return err 677 } 678 679 if err := devices.closeTransaction(); err != nil { 680 devices.unregisterDevice(deviceID, hash) 681 devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID) 682 devices.markDeviceIDFree(deviceID) 683 return err 684 } 685 return nil 686 } 687 688 func (devices *DeviceSet) loadMetadata(hash string) *devInfo { 689 info := &devInfo{Hash: hash, devices: devices} 690 691 jsonData, err := ioutil.ReadFile(devices.metadataFile(info)) 692 if err != nil { 693 return nil 694 } 695 696 if err := json.Unmarshal(jsonData, &info); err != nil { 697 return nil 698 } 699 700 return info 701 } 702 703 func getDeviceUUID(device string) (string, error) { 704 out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output() 705 if err != nil { 706 logrus.Debugf("Failed to find uuid for device %s:%v", device, err) 707 return "", err 708 } 709 710 uuid := strings.TrimSuffix(string(out), "\n") 711 uuid = strings.TrimSpace(uuid) 712 logrus.Debugf("UUID for device: %s is:%s", device, uuid) 713 return uuid, nil 714 } 715 716 func (devices *DeviceSet) verifyBaseDeviceUUID(baseInfo *devInfo) error { 717 devices.Lock() 718 defer devices.Unlock() 719 720 if err := devices.activateDeviceIfNeeded(baseInfo); err != nil { 721 return err 722 } 723 724 defer devices.deactivateDevice(baseInfo) 725 726 uuid, err := getDeviceUUID(baseInfo.DevName()) 727 if err != nil { 728 return err 729 } 730 731 if devices.BaseDeviceUUID != uuid { 732 return fmt.Errorf("Current Base Device UUID:%s does not match with stored UUID:%s", uuid, devices.BaseDeviceUUID) 733 } 734 735 return nil 736 } 737 738 func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error { 739 devices.Lock() 740 defer devices.Unlock() 741 742 if err := devices.activateDeviceIfNeeded(baseInfo); err != nil { 743 return err 744 } 745 746 defer devices.deactivateDevice(baseInfo) 747 748 uuid, err := getDeviceUUID(baseInfo.DevName()) 749 if err != nil { 750 return err 751 } 752 753 devices.BaseDeviceUUID = uuid 754 devices.saveDeviceSetMetaData() 755 return nil 756 } 757 758 func (devices *DeviceSet) setupBaseImage() error { 759 oldInfo, _ := devices.lookupDevice("") 760 if oldInfo != nil && oldInfo.Initialized { 761 // If BaseDeviceUUID is nil (upgrade case), save it and 762 // return success. 763 if devices.BaseDeviceUUID == "" { 764 if err := devices.saveBaseDeviceUUID(oldInfo); err != nil { 765 return fmt.Errorf("Could not query and save base device UUID:%v", err) 766 } 767 return nil 768 } 769 770 if err := devices.verifyBaseDeviceUUID(oldInfo); err != nil { 771 return fmt.Errorf("Base Device UUID verification failed. Possibly using a different thin pool than last invocation:%v", err) 772 } 773 return nil 774 } 775 776 if oldInfo != nil && !oldInfo.Initialized { 777 logrus.Debugf("Removing uninitialized base image") 778 if err := devices.DeleteDevice(""); err != nil { 779 return err 780 } 781 } 782 783 if devices.thinPoolDevice != "" && oldInfo == nil { 784 _, transactionID, dataUsed, _, _, _, err := devices.poolStatus() 785 if err != nil { 786 return err 787 } 788 if dataUsed != 0 { 789 return fmt.Errorf("Unable to take ownership of thin-pool (%s) that already has used data blocks", 790 devices.thinPoolDevice) 791 } 792 if transactionID != 0 { 793 return fmt.Errorf("Unable to take ownership of thin-pool (%s) with non-zero transaction ID", 794 devices.thinPoolDevice) 795 } 796 } 797 798 logrus.Debugf("Initializing base device-mapper thin volume") 799 800 // Create initial device 801 info, err := devices.createRegisterDevice("") 802 if err != nil { 803 return err 804 } 805 806 logrus.Debugf("Creating filesystem on base device-mapper thin volume") 807 808 if err := devices.activateDeviceIfNeeded(info); err != nil { 809 return err 810 } 811 812 if err := devices.createFilesystem(info); err != nil { 813 return err 814 } 815 816 info.Initialized = true 817 if err := devices.saveMetadata(info); err != nil { 818 info.Initialized = false 819 return err 820 } 821 822 if err := devices.saveBaseDeviceUUID(info); err != nil { 823 return fmt.Errorf("Could not query and save base device UUID:%v", err) 824 } 825 826 return nil 827 } 828 829 func setCloseOnExec(name string) { 830 if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil { 831 for _, i := range fileInfos { 832 link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name())) 833 if link == name { 834 fd, err := strconv.Atoi(i.Name()) 835 if err == nil { 836 syscall.CloseOnExec(fd) 837 } 838 } 839 } 840 } 841 } 842 843 // DMLog implements logging using DevMapperLogger interface. 844 func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) { 845 // By default libdm sends us all the messages including debug ones. 846 // We need to filter out messages here and figure out which one 847 // should be printed. 848 if level > logLevel { 849 return 850 } 851 852 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 853 if level <= devicemapper.LogLevelErr { 854 logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 855 } else if level <= devicemapper.LogLevelInfo { 856 logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 857 } else { 858 // FIXME(vbatts) push this back into ./pkg/devicemapper/ 859 logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message) 860 } 861 } 862 863 func major(device uint64) uint64 { 864 return (device >> 8) & 0xfff 865 } 866 867 func minor(device uint64) uint64 { 868 return (device & 0xff) | ((device >> 12) & 0xfff00) 869 } 870 871 // ResizePool increases the size of the pool. 872 func (devices *DeviceSet) ResizePool(size int64) error { 873 dirname := devices.loopbackDir() 874 datafilename := path.Join(dirname, "data") 875 if len(devices.dataDevice) > 0 { 876 datafilename = devices.dataDevice 877 } 878 metadatafilename := path.Join(dirname, "metadata") 879 if len(devices.metadataDevice) > 0 { 880 metadatafilename = devices.metadataDevice 881 } 882 883 datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0) 884 if datafile == nil { 885 return err 886 } 887 defer datafile.Close() 888 889 fi, err := datafile.Stat() 890 if fi == nil { 891 return err 892 } 893 894 if fi.Size() > size { 895 return fmt.Errorf("Can't shrink file") 896 } 897 898 dataloopback := devicemapper.FindLoopDeviceFor(datafile) 899 if dataloopback == nil { 900 return fmt.Errorf("Unable to find loopback mount for: %s", datafilename) 901 } 902 defer dataloopback.Close() 903 904 metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0) 905 if metadatafile == nil { 906 return err 907 } 908 defer metadatafile.Close() 909 910 metadataloopback := devicemapper.FindLoopDeviceFor(metadatafile) 911 if metadataloopback == nil { 912 return fmt.Errorf("Unable to find loopback mount for: %s", metadatafilename) 913 } 914 defer metadataloopback.Close() 915 916 // Grow loopback file 917 if err := datafile.Truncate(size); err != nil { 918 return fmt.Errorf("Unable to grow loopback file: %s", err) 919 } 920 921 // Reload size for loopback device 922 if err := devicemapper.LoopbackSetCapacity(dataloopback); err != nil { 923 return fmt.Errorf("Unable to update loopback capacity: %s", err) 924 } 925 926 // Suspend the pool 927 if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { 928 return fmt.Errorf("Unable to suspend pool: %s", err) 929 } 930 931 // Reload with the new block sizes 932 if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil { 933 return fmt.Errorf("Unable to reload pool: %s", err) 934 } 935 936 // Resume the pool 937 if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { 938 return fmt.Errorf("Unable to resume pool: %s", err) 939 } 940 941 return nil 942 } 943 944 func (devices *DeviceSet) loadTransactionMetaData() error { 945 jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) 946 if err != nil { 947 // There is no active transaction. This will be the case 948 // during upgrade. 949 if os.IsNotExist(err) { 950 devices.OpenTransactionID = devices.TransactionID 951 return nil 952 } 953 return err 954 } 955 956 json.Unmarshal(jsonData, &devices.transaction) 957 return nil 958 } 959 960 func (devices *DeviceSet) saveTransactionMetaData() error { 961 jsonData, err := json.Marshal(&devices.transaction) 962 if err != nil { 963 return fmt.Errorf("Error encoding metadata to json: %s", err) 964 } 965 966 return devices.writeMetaFile(jsonData, devices.transactionMetaFile()) 967 } 968 969 func (devices *DeviceSet) removeTransactionMetaData() error { 970 if err := os.RemoveAll(devices.transactionMetaFile()); err != nil { 971 return err 972 } 973 return nil 974 } 975 976 func (devices *DeviceSet) rollbackTransaction() error { 977 logrus.Debugf("Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID) 978 979 // A device id might have already been deleted before transaction 980 // closed. In that case this call will fail. Just leave a message 981 // in case of failure. 982 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil { 983 logrus.Errorf("Unable to delete device: %s", err) 984 } 985 986 dinfo := &devInfo{Hash: devices.DeviceIDHash} 987 if err := devices.removeMetadata(dinfo); err != nil { 988 logrus.Errorf("Unable to remove metadata: %s", err) 989 } else { 990 devices.markDeviceIDFree(devices.DeviceID) 991 } 992 993 if err := devices.removeTransactionMetaData(); err != nil { 994 logrus.Errorf("Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err) 995 } 996 997 return nil 998 } 999 1000 func (devices *DeviceSet) processPendingTransaction() error { 1001 if err := devices.loadTransactionMetaData(); err != nil { 1002 return err 1003 } 1004 1005 // If there was open transaction but pool transaction ID is same 1006 // as open transaction ID, nothing to roll back. 1007 if devices.TransactionID == devices.OpenTransactionID { 1008 return nil 1009 } 1010 1011 // If open transaction ID is less than pool transaction ID, something 1012 // is wrong. Bail out. 1013 if devices.OpenTransactionID < devices.TransactionID { 1014 logrus.Errorf("Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID) 1015 return nil 1016 } 1017 1018 // Pool transaction ID is not same as open transaction. There is 1019 // a transaction which was not completed. 1020 if err := devices.rollbackTransaction(); err != nil { 1021 return fmt.Errorf("Rolling back open transaction failed: %s", err) 1022 } 1023 1024 devices.OpenTransactionID = devices.TransactionID 1025 return nil 1026 } 1027 1028 func (devices *DeviceSet) loadDeviceSetMetaData() error { 1029 jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile()) 1030 if err != nil { 1031 // For backward compatibility return success if file does 1032 // not exist. 1033 if os.IsNotExist(err) { 1034 return nil 1035 } 1036 return err 1037 } 1038 1039 return json.Unmarshal(jsonData, devices) 1040 } 1041 1042 func (devices *DeviceSet) saveDeviceSetMetaData() error { 1043 jsonData, err := json.Marshal(devices) 1044 if err != nil { 1045 return fmt.Errorf("Error encoding metadata to json: %s", err) 1046 } 1047 1048 return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile()) 1049 } 1050 1051 func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error { 1052 devices.allocateTransactionID() 1053 devices.DeviceIDHash = hash 1054 devices.DeviceID = DeviceID 1055 if err := devices.saveTransactionMetaData(); err != nil { 1056 return fmt.Errorf("Error saving transaction metadata: %s", err) 1057 } 1058 return nil 1059 } 1060 1061 func (devices *DeviceSet) refreshTransaction(DeviceID int) error { 1062 devices.DeviceID = DeviceID 1063 if err := devices.saveTransactionMetaData(); err != nil { 1064 return fmt.Errorf("Error saving transaction metadata: %s", err) 1065 } 1066 return nil 1067 } 1068 1069 func (devices *DeviceSet) closeTransaction() error { 1070 if err := devices.updatePoolTransactionID(); err != nil { 1071 logrus.Debugf("Failed to close Transaction") 1072 return err 1073 } 1074 return nil 1075 } 1076 1077 func determineDriverCapabilities(version string) error { 1078 /* 1079 * Driver version 4.27.0 and greater support deferred activation 1080 * feature. 1081 */ 1082 1083 logrus.Debugf("devicemapper: driver version is %s", version) 1084 1085 versionSplit := strings.Split(version, ".") 1086 major, err := strconv.Atoi(versionSplit[0]) 1087 if err != nil { 1088 return graphdriver.ErrNotSupported 1089 } 1090 1091 if major > 4 { 1092 driverDeferredRemovalSupport = true 1093 return nil 1094 } 1095 1096 if major < 4 { 1097 return nil 1098 } 1099 1100 minor, err := strconv.Atoi(versionSplit[1]) 1101 if err != nil { 1102 return graphdriver.ErrNotSupported 1103 } 1104 1105 /* 1106 * If major is 4 and minor is 27, then there is no need to 1107 * check for patch level as it can not be less than 0. 1108 */ 1109 if minor >= 27 { 1110 driverDeferredRemovalSupport = true 1111 return nil 1112 } 1113 1114 return nil 1115 } 1116 1117 // Determine the major and minor number of loopback device 1118 func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) { 1119 stat, err := file.Stat() 1120 if err != nil { 1121 return 0, 0, err 1122 } 1123 1124 dev := stat.Sys().(*syscall.Stat_t).Rdev 1125 majorNum := major(dev) 1126 minorNum := minor(dev) 1127 1128 logrus.Debugf("[devmapper]: Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum) 1129 return majorNum, minorNum, nil 1130 } 1131 1132 // Given a file which is backing file of a loop back device, find the 1133 // loopback device name and its major/minor number. 1134 func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) { 1135 file, err := os.Open(filename) 1136 if err != nil { 1137 logrus.Debugf("[devmapper]: Failed to open file %s", filename) 1138 return "", 0, 0, err 1139 } 1140 1141 defer file.Close() 1142 loopbackDevice := devicemapper.FindLoopDeviceFor(file) 1143 if loopbackDevice == nil { 1144 return "", 0, 0, fmt.Errorf("[devmapper]: Unable to find loopback mount for: %s", filename) 1145 } 1146 defer loopbackDevice.Close() 1147 1148 Major, Minor, err := getDeviceMajorMinor(loopbackDevice) 1149 if err != nil { 1150 return "", 0, 0, err 1151 } 1152 return loopbackDevice.Name(), Major, Minor, nil 1153 } 1154 1155 // Get the major/minor numbers of thin pool data and metadata devices 1156 func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) { 1157 var params, poolDataMajMin, poolMetadataMajMin string 1158 1159 _, _, _, params, err := devicemapper.GetTable(devices.getPoolName()) 1160 if err != nil { 1161 return 0, 0, 0, 0, err 1162 } 1163 1164 if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil { 1165 return 0, 0, 0, 0, err 1166 } 1167 1168 logrus.Debugf("[devmapper]: poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin) 1169 1170 poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":") 1171 poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32) 1172 if err != nil { 1173 return 0, 0, 0, 0, err 1174 } 1175 1176 poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32) 1177 if err != nil { 1178 return 0, 0, 0, 0, err 1179 } 1180 1181 poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":") 1182 poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32) 1183 if err != nil { 1184 return 0, 0, 0, 0, err 1185 } 1186 1187 poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32) 1188 if err != nil { 1189 return 0, 0, 0, 0, err 1190 } 1191 1192 return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil 1193 } 1194 1195 func (devices *DeviceSet) loadThinPoolLoopBackInfo() error { 1196 poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin() 1197 if err != nil { 1198 return err 1199 } 1200 1201 dirname := devices.loopbackDir() 1202 1203 // data device has not been passed in. So there should be a data file 1204 // which is being mounted as loop device. 1205 if devices.dataDevice == "" { 1206 datafilename := path.Join(dirname, "data") 1207 dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename) 1208 if err != nil { 1209 return err 1210 } 1211 1212 // Compare the two 1213 if poolDataMajor == dataMajor && poolDataMinor == dataMinor { 1214 devices.dataDevice = dataLoopDevice 1215 devices.dataLoopFile = datafilename 1216 } 1217 1218 } 1219 1220 // metadata device has not been passed in. So there should be a 1221 // metadata file which is being mounted as loop device. 1222 if devices.metadataDevice == "" { 1223 metadatafilename := path.Join(dirname, "metadata") 1224 metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename) 1225 if err != nil { 1226 return err 1227 } 1228 if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor { 1229 devices.metadataDevice = metadataLoopDevice 1230 devices.metadataLoopFile = metadatafilename 1231 } 1232 } 1233 1234 return nil 1235 } 1236 1237 func (devices *DeviceSet) initDevmapper(doInit bool) error { 1238 // give ourselves to libdm as a log handler 1239 devicemapper.LogInit(devices) 1240 1241 version, err := devicemapper.GetDriverVersion() 1242 if err != nil { 1243 // Can't even get driver version, assume not supported 1244 return graphdriver.ErrNotSupported 1245 } 1246 1247 if err := determineDriverCapabilities(version); err != nil { 1248 return graphdriver.ErrNotSupported 1249 } 1250 1251 // If user asked for deferred removal and both library and driver 1252 // supports deferred removal use it. 1253 if enableDeferredRemoval && driverDeferredRemovalSupport && devicemapper.LibraryDeferredRemovalSupport == true { 1254 logrus.Debugf("devmapper: Deferred removal support enabled.") 1255 devices.deferredRemove = true 1256 } 1257 1258 // https://github.com/docker/docker/issues/4036 1259 if supported := devicemapper.UdevSetSyncSupport(true); !supported { 1260 logrus.Warn("Udev sync is not supported. This will lead to unexpected behavior, data loss and errors. For more information, see https://docs.docker.com/reference/commandline/daemon/#daemon-storage-driver-option") 1261 } 1262 1263 if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil { 1264 return err 1265 } 1266 1267 // Set the device prefix from the device id and inode of the docker root dir 1268 1269 st, err := os.Stat(devices.root) 1270 if err != nil { 1271 return fmt.Errorf("Error looking up dir %s: %s", devices.root, err) 1272 } 1273 sysSt := st.Sys().(*syscall.Stat_t) 1274 // "reg-" stands for "regular file". 1275 // In the future we might use "dev-" for "device file", etc. 1276 // docker-maj,min[-inode] stands for: 1277 // - Managed by docker 1278 // - The target of this device is at major <maj> and minor <min> 1279 // - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself. 1280 devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino) 1281 logrus.Debugf("Generated prefix: %s", devices.devicePrefix) 1282 1283 // Check for the existence of the thin-pool device 1284 logrus.Debugf("Checking for existence of the pool '%s'", devices.getPoolName()) 1285 info, err := devicemapper.GetInfo(devices.getPoolName()) 1286 if info == nil { 1287 logrus.Debugf("Error device devicemapper.GetInfo: %s", err) 1288 return err 1289 } 1290 1291 // It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files 1292 // that are not Close-on-exec, and lxc-start will die if it inherits any unexpected files, 1293 // so we add this badhack to make sure it closes itself 1294 setCloseOnExec("/dev/mapper/control") 1295 1296 // Make sure the sparse images exist in <root>/devicemapper/data and 1297 // <root>/devicemapper/metadata 1298 1299 createdLoopback := false 1300 1301 // If the pool doesn't exist, create it 1302 if info.Exists == 0 && devices.thinPoolDevice == "" { 1303 logrus.Debugf("Pool doesn't exist. Creating it.") 1304 1305 var ( 1306 dataFile *os.File 1307 metadataFile *os.File 1308 ) 1309 1310 if devices.dataDevice == "" { 1311 // Make sure the sparse images exist in <root>/devicemapper/data 1312 1313 hasData := devices.hasImage("data") 1314 1315 if !doInit && !hasData { 1316 return errors.New("Loopback data file not found") 1317 } 1318 1319 if !hasData { 1320 createdLoopback = true 1321 } 1322 1323 data, err := devices.ensureImage("data", devices.dataLoopbackSize) 1324 if err != nil { 1325 logrus.Debugf("Error device ensureImage (data): %s", err) 1326 return err 1327 } 1328 1329 dataFile, err = devicemapper.AttachLoopDevice(data) 1330 if err != nil { 1331 return err 1332 } 1333 devices.dataLoopFile = data 1334 devices.dataDevice = dataFile.Name() 1335 } else { 1336 dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600) 1337 if err != nil { 1338 return err 1339 } 1340 } 1341 defer dataFile.Close() 1342 1343 if devices.metadataDevice == "" { 1344 // Make sure the sparse images exist in <root>/devicemapper/metadata 1345 1346 hasMetadata := devices.hasImage("metadata") 1347 1348 if !doInit && !hasMetadata { 1349 return errors.New("Loopback metadata file not found") 1350 } 1351 1352 if !hasMetadata { 1353 createdLoopback = true 1354 } 1355 1356 metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize) 1357 if err != nil { 1358 logrus.Debugf("Error device ensureImage (metadata): %s", err) 1359 return err 1360 } 1361 1362 metadataFile, err = devicemapper.AttachLoopDevice(metadata) 1363 if err != nil { 1364 return err 1365 } 1366 devices.metadataLoopFile = metadata 1367 devices.metadataDevice = metadataFile.Name() 1368 } else { 1369 metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600) 1370 if err != nil { 1371 return err 1372 } 1373 } 1374 defer metadataFile.Close() 1375 1376 if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil { 1377 return err 1378 } 1379 } 1380 1381 // Pool already exists and caller did not pass us a pool. That means 1382 // we probably created pool earlier and could not remove it as some 1383 // containers were still using it. Detect some of the properties of 1384 // pool, like is it using loop devices. 1385 if info.Exists != 0 && devices.thinPoolDevice == "" { 1386 if err := devices.loadThinPoolLoopBackInfo(); err != nil { 1387 logrus.Debugf("Failed to load thin pool loopback device information:%v", err) 1388 return err 1389 } 1390 } 1391 1392 // If we didn't just create the data or metadata image, we need to 1393 // load the transaction id and migrate old metadata 1394 if !createdLoopback { 1395 if err := devices.initMetaData(); err != nil { 1396 return err 1397 } 1398 } 1399 1400 if devices.thinPoolDevice == "" { 1401 if devices.metadataLoopFile != "" || devices.dataLoopFile != "" { 1402 logrus.Warnf("Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man docker` to refer to dm.thinpooldev section.") 1403 } 1404 } 1405 1406 // Right now this loads only NextDeviceID. If there is more metadata 1407 // down the line, we might have to move it earlier. 1408 if err := devices.loadDeviceSetMetaData(); err != nil { 1409 return err 1410 } 1411 1412 // Setup the base image 1413 if doInit { 1414 if err := devices.setupBaseImage(); err != nil { 1415 logrus.Debugf("Error device setupBaseImage: %s", err) 1416 return err 1417 } 1418 } 1419 1420 return nil 1421 } 1422 1423 // AddDevice adds a device and registers in the hash. 1424 func (devices *DeviceSet) AddDevice(hash, baseHash string) error { 1425 logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s)", hash, baseHash) 1426 defer logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s) END", hash, baseHash) 1427 1428 baseInfo, err := devices.lookupDevice(baseHash) 1429 if err != nil { 1430 return err 1431 } 1432 1433 baseInfo.lock.Lock() 1434 defer baseInfo.lock.Unlock() 1435 1436 devices.Lock() 1437 defer devices.Unlock() 1438 1439 if info, _ := devices.lookupDevice(hash); info != nil { 1440 return fmt.Errorf("device %s already exists", hash) 1441 } 1442 1443 if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil { 1444 return err 1445 } 1446 1447 return nil 1448 } 1449 1450 func (devices *DeviceSet) deleteDevice(info *devInfo) error { 1451 if devices.doBlkDiscard { 1452 // This is a workaround for the kernel not discarding block so 1453 // on the thin pool when we remove a thinp device, so we do it 1454 // manually 1455 if err := devices.activateDeviceIfNeeded(info); err == nil { 1456 if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil { 1457 logrus.Debugf("Error discarding block on device: %s (ignoring)", err) 1458 } 1459 } 1460 } 1461 1462 devinfo, _ := devicemapper.GetInfo(info.Name()) 1463 if devinfo != nil && devinfo.Exists != 0 { 1464 if err := devices.removeDevice(info.Name()); err != nil { 1465 logrus.Debugf("Error removing device: %s", err) 1466 return err 1467 } 1468 } 1469 1470 if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil { 1471 logrus.Debugf("Error opening transaction hash = %s deviceID = %d", "", info.DeviceID) 1472 return err 1473 } 1474 1475 if err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID); err != nil { 1476 logrus.Debugf("Error deleting device: %s", err) 1477 return err 1478 } 1479 1480 if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil { 1481 return err 1482 } 1483 1484 if err := devices.closeTransaction(); err != nil { 1485 return err 1486 } 1487 1488 devices.markDeviceIDFree(info.DeviceID) 1489 1490 return nil 1491 } 1492 1493 // DeleteDevice deletes a device from the hash. 1494 func (devices *DeviceSet) DeleteDevice(hash string) error { 1495 info, err := devices.lookupDevice(hash) 1496 if err != nil { 1497 return err 1498 } 1499 1500 info.lock.Lock() 1501 defer info.lock.Unlock() 1502 1503 devices.Lock() 1504 defer devices.Unlock() 1505 1506 return devices.deleteDevice(info) 1507 } 1508 1509 func (devices *DeviceSet) deactivatePool() error { 1510 logrus.Debugf("[devmapper] deactivatePool()") 1511 defer logrus.Debugf("[devmapper] deactivatePool END") 1512 devname := devices.getPoolDevName() 1513 1514 devinfo, err := devicemapper.GetInfo(devname) 1515 if err != nil { 1516 return err 1517 } 1518 1519 if devinfo.Exists == 0 { 1520 return nil 1521 } 1522 if err := devicemapper.RemoveDevice(devname); err != nil { 1523 return err 1524 } 1525 1526 if d, err := devicemapper.GetDeps(devname); err == nil { 1527 logrus.Warnf("[devmapper] device %s still has %d active dependents", devname, d.Count) 1528 } 1529 1530 return nil 1531 } 1532 1533 func (devices *DeviceSet) deactivateDevice(info *devInfo) error { 1534 logrus.Debugf("[devmapper] deactivateDevice(%s)", info.Hash) 1535 defer logrus.Debugf("[devmapper] deactivateDevice END(%s)", info.Hash) 1536 1537 devinfo, err := devicemapper.GetInfo(info.Name()) 1538 if err != nil { 1539 return err 1540 } 1541 1542 if devinfo.Exists == 0 { 1543 return nil 1544 } 1545 1546 if devices.deferredRemove { 1547 if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil { 1548 return err 1549 } 1550 } else { 1551 if err := devices.removeDevice(info.Name()); err != nil { 1552 return err 1553 } 1554 } 1555 return nil 1556 } 1557 1558 // Issues the underlying dm remove operation. 1559 func (devices *DeviceSet) removeDevice(devname string) error { 1560 var err error 1561 1562 logrus.Debugf("[devmapper] removeDevice START(%s)", devname) 1563 defer logrus.Debugf("[devmapper] removeDevice END(%s)", devname) 1564 1565 for i := 0; i < 200; i++ { 1566 err = devicemapper.RemoveDevice(devname) 1567 if err == nil { 1568 break 1569 } 1570 if err != devicemapper.ErrBusy { 1571 return err 1572 } 1573 1574 // If we see EBUSY it may be a transient error, 1575 // sleep a bit a retry a few times. 1576 devices.Unlock() 1577 time.Sleep(100 * time.Millisecond) 1578 devices.Lock() 1579 } 1580 1581 return err 1582 } 1583 1584 func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error { 1585 if !devices.deferredRemove { 1586 return nil 1587 } 1588 1589 logrus.Debugf("[devmapper] cancelDeferredRemoval START(%s)", info.Name()) 1590 defer logrus.Debugf("[devmapper] cancelDeferredRemoval END(%s)", info.Name()) 1591 1592 devinfo, err := devicemapper.GetInfoWithDeferred(info.Name()) 1593 1594 if devinfo != nil && devinfo.DeferredRemove == 0 { 1595 return nil 1596 } 1597 1598 // Cancel deferred remove 1599 for i := 0; i < 100; i++ { 1600 err = devicemapper.CancelDeferredRemove(info.Name()) 1601 if err == nil { 1602 break 1603 } 1604 1605 if err == devicemapper.ErrEnxio { 1606 // Device is probably already gone. Return success. 1607 return nil 1608 } 1609 1610 if err != devicemapper.ErrBusy { 1611 return err 1612 } 1613 1614 // If we see EBUSY it may be a transient error, 1615 // sleep a bit a retry a few times. 1616 devices.Unlock() 1617 time.Sleep(100 * time.Millisecond) 1618 devices.Lock() 1619 } 1620 return err 1621 } 1622 1623 // Shutdown shuts down the device by unmounting the root. 1624 func (devices *DeviceSet) Shutdown() error { 1625 logrus.Debugf("[deviceset %s] Shutdown()", devices.devicePrefix) 1626 logrus.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root) 1627 defer logrus.Debugf("[deviceset %s] Shutdown() END", devices.devicePrefix) 1628 1629 var devs []*devInfo 1630 1631 devices.devicesLock.Lock() 1632 for _, info := range devices.Devices { 1633 devs = append(devs, info) 1634 } 1635 devices.devicesLock.Unlock() 1636 1637 for _, info := range devs { 1638 info.lock.Lock() 1639 if info.mountCount > 0 { 1640 // We use MNT_DETACH here in case it is still busy in some running 1641 // container. This means it'll go away from the global scope directly, 1642 // and the device will be released when that container dies. 1643 if err := syscall.Unmount(info.mountPath, syscall.MNT_DETACH); err != nil { 1644 logrus.Debugf("Shutdown unmounting %s, error: %s", info.mountPath, err) 1645 } 1646 1647 devices.Lock() 1648 if err := devices.deactivateDevice(info); err != nil { 1649 logrus.Debugf("Shutdown deactivate %s , error: %s", info.Hash, err) 1650 } 1651 devices.Unlock() 1652 } 1653 info.lock.Unlock() 1654 } 1655 1656 info, _ := devices.lookupDevice("") 1657 if info != nil { 1658 info.lock.Lock() 1659 devices.Lock() 1660 if err := devices.deactivateDevice(info); err != nil { 1661 logrus.Debugf("Shutdown deactivate base , error: %s", err) 1662 } 1663 devices.Unlock() 1664 info.lock.Unlock() 1665 } 1666 1667 devices.Lock() 1668 if devices.thinPoolDevice == "" { 1669 if err := devices.deactivatePool(); err != nil { 1670 logrus.Debugf("Shutdown deactivate pool , error: %s", err) 1671 } 1672 } 1673 1674 devices.saveDeviceSetMetaData() 1675 devices.Unlock() 1676 1677 return nil 1678 } 1679 1680 // MountDevice mounts the device if not already mounted. 1681 func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error { 1682 info, err := devices.lookupDevice(hash) 1683 if err != nil { 1684 return err 1685 } 1686 1687 info.lock.Lock() 1688 defer info.lock.Unlock() 1689 1690 devices.Lock() 1691 defer devices.Unlock() 1692 1693 if info.mountCount > 0 { 1694 if path != info.mountPath { 1695 return fmt.Errorf("Trying to mount devmapper device in multiple places (%s, %s)", info.mountPath, path) 1696 } 1697 1698 info.mountCount++ 1699 return nil 1700 } 1701 1702 if err := devices.activateDeviceIfNeeded(info); err != nil { 1703 return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err) 1704 } 1705 1706 fstype, err := ProbeFsType(info.DevName()) 1707 if err != nil { 1708 return err 1709 } 1710 1711 options := "" 1712 1713 if fstype == "xfs" { 1714 // XFS needs nouuid or it can't mount filesystems with the same fs 1715 options = joinMountOptions(options, "nouuid") 1716 } 1717 1718 options = joinMountOptions(options, devices.mountOptions) 1719 options = joinMountOptions(options, label.FormatMountLabel("", mountLabel)) 1720 1721 if err := syscall.Mount(info.DevName(), path, fstype, syscall.MS_MGC_VAL, options); err != nil { 1722 return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), path, err) 1723 } 1724 1725 info.mountCount = 1 1726 info.mountPath = path 1727 1728 return nil 1729 } 1730 1731 // UnmountDevice unmounts the device and removes it from hash. 1732 func (devices *DeviceSet) UnmountDevice(hash string) error { 1733 logrus.Debugf("[devmapper] UnmountDevice(hash=%s)", hash) 1734 defer logrus.Debugf("[devmapper] UnmountDevice(hash=%s) END", hash) 1735 1736 info, err := devices.lookupDevice(hash) 1737 if err != nil { 1738 return err 1739 } 1740 1741 info.lock.Lock() 1742 defer info.lock.Unlock() 1743 1744 devices.Lock() 1745 defer devices.Unlock() 1746 1747 if info.mountCount == 0 { 1748 return fmt.Errorf("UnmountDevice: device not-mounted id %s", hash) 1749 } 1750 1751 info.mountCount-- 1752 if info.mountCount > 0 { 1753 return nil 1754 } 1755 1756 logrus.Debugf("[devmapper] Unmount(%s)", info.mountPath) 1757 if err := syscall.Unmount(info.mountPath, syscall.MNT_DETACH); err != nil { 1758 return err 1759 } 1760 logrus.Debugf("[devmapper] Unmount done") 1761 1762 if err := devices.deactivateDevice(info); err != nil { 1763 return err 1764 } 1765 1766 info.mountPath = "" 1767 1768 return nil 1769 } 1770 1771 // HasDevice returns true if the device is in the hash and mounted. 1772 func (devices *DeviceSet) HasDevice(hash string) bool { 1773 devices.Lock() 1774 defer devices.Unlock() 1775 1776 info, _ := devices.lookupDevice(hash) 1777 return info != nil 1778 } 1779 1780 // HasActivatedDevice return true if the device exists. 1781 func (devices *DeviceSet) HasActivatedDevice(hash string) bool { 1782 info, _ := devices.lookupDevice(hash) 1783 if info == nil { 1784 return false 1785 } 1786 1787 info.lock.Lock() 1788 defer info.lock.Unlock() 1789 1790 devices.Lock() 1791 defer devices.Unlock() 1792 1793 devinfo, _ := devicemapper.GetInfo(info.Name()) 1794 return devinfo != nil && devinfo.Exists != 0 1795 } 1796 1797 // List returns a list of device ids. 1798 func (devices *DeviceSet) List() []string { 1799 devices.Lock() 1800 defer devices.Unlock() 1801 1802 devices.devicesLock.Lock() 1803 ids := make([]string, len(devices.Devices)) 1804 i := 0 1805 for k := range devices.Devices { 1806 ids[i] = k 1807 i++ 1808 } 1809 devices.devicesLock.Unlock() 1810 1811 return ids 1812 } 1813 1814 func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) { 1815 var params string 1816 _, sizeInSectors, _, params, err = devicemapper.GetStatus(devName) 1817 if err != nil { 1818 return 1819 } 1820 if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil { 1821 return 1822 } 1823 return 1824 } 1825 1826 // GetDeviceStatus provides size, mapped sectors 1827 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { 1828 info, err := devices.lookupDevice(hash) 1829 if err != nil { 1830 return nil, err 1831 } 1832 1833 info.lock.Lock() 1834 defer info.lock.Unlock() 1835 1836 devices.Lock() 1837 defer devices.Unlock() 1838 1839 status := &DevStatus{ 1840 DeviceID: info.DeviceID, 1841 Size: info.Size, 1842 TransactionID: info.TransactionID, 1843 } 1844 1845 if err := devices.activateDeviceIfNeeded(info); err != nil { 1846 return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err) 1847 } 1848 1849 sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()) 1850 1851 if err != nil { 1852 return nil, err 1853 } 1854 1855 status.SizeInSectors = sizeInSectors 1856 status.MappedSectors = mappedSectors 1857 status.HighestMappedSector = highestMappedSector 1858 1859 return status, nil 1860 } 1861 1862 func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) { 1863 var params string 1864 if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil { 1865 _, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal) 1866 } 1867 return 1868 } 1869 1870 // DataDevicePath returns the path to the data storage for this deviceset, 1871 // regardless of loopback or block device 1872 func (devices *DeviceSet) DataDevicePath() string { 1873 return devices.dataDevice 1874 } 1875 1876 // MetadataDevicePath returns the path to the metadata storage for this deviceset, 1877 // regardless of loopback or block device 1878 func (devices *DeviceSet) MetadataDevicePath() string { 1879 return devices.metadataDevice 1880 } 1881 1882 func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) { 1883 buf := new(syscall.Statfs_t) 1884 if err := syscall.Statfs(loopFile, buf); err != nil { 1885 logrus.Warnf("Couldn't stat loopfile filesystem %v: %v", loopFile, err) 1886 return 0, err 1887 } 1888 return buf.Bfree * uint64(buf.Bsize), nil 1889 } 1890 1891 func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) { 1892 if loopFile != "" { 1893 fi, err := os.Stat(loopFile) 1894 if err != nil { 1895 logrus.Warnf("Couldn't stat loopfile %v: %v", loopFile, err) 1896 return false, err 1897 } 1898 return fi.Mode().IsRegular(), nil 1899 } 1900 return false, nil 1901 } 1902 1903 // Status returns the current status of this deviceset 1904 func (devices *DeviceSet) Status() *Status { 1905 devices.Lock() 1906 defer devices.Unlock() 1907 1908 status := &Status{} 1909 1910 status.PoolName = devices.getPoolName() 1911 status.DataFile = devices.DataDevicePath() 1912 status.DataLoopback = devices.dataLoopFile 1913 status.MetadataFile = devices.MetadataDevicePath() 1914 status.MetadataLoopback = devices.metadataLoopFile 1915 status.UdevSyncSupported = devicemapper.UdevSyncSupported() 1916 status.DeferredRemoveEnabled = devices.deferredRemove 1917 1918 totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus() 1919 if err == nil { 1920 // Convert from blocks to bytes 1921 blockSizeInSectors := totalSizeInSectors / dataTotal 1922 1923 status.Data.Used = dataUsed * blockSizeInSectors * 512 1924 status.Data.Total = dataTotal * blockSizeInSectors * 512 1925 status.Data.Available = status.Data.Total - status.Data.Used 1926 1927 // metadata blocks are always 4k 1928 status.Metadata.Used = metadataUsed * 4096 1929 status.Metadata.Total = metadataTotal * 4096 1930 status.Metadata.Available = status.Metadata.Total - status.Metadata.Used 1931 1932 status.SectorSize = blockSizeInSectors * 512 1933 1934 if check, _ := devices.isRealFile(devices.dataLoopFile); check { 1935 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile) 1936 if err == nil && actualSpace < status.Data.Available { 1937 status.Data.Available = actualSpace 1938 } 1939 } 1940 1941 if check, _ := devices.isRealFile(devices.metadataLoopFile); check { 1942 actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile) 1943 if err == nil && actualSpace < status.Metadata.Available { 1944 status.Metadata.Available = actualSpace 1945 } 1946 } 1947 } 1948 1949 return status 1950 } 1951 1952 // Status returns the current status of this deviceset 1953 func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) { 1954 info, err := devices.lookupDevice(hash) 1955 if err != nil { 1956 return nil, err 1957 } 1958 1959 info.lock.Lock() 1960 defer info.lock.Unlock() 1961 1962 metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()} 1963 return metadata, nil 1964 } 1965 1966 // NewDeviceSet creates the device set based on the options provided. 1967 func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error) { 1968 devicemapper.SetDevDir("/dev") 1969 1970 devices := &DeviceSet{ 1971 root: root, 1972 metaData: metaData{Devices: make(map[string]*devInfo)}, 1973 dataLoopbackSize: defaultDataLoopbackSize, 1974 metaDataLoopbackSize: defaultMetaDataLoopbackSize, 1975 baseFsSize: defaultBaseFsSize, 1976 overrideUdevSyncCheck: defaultUdevSyncOverride, 1977 filesystem: "ext4", 1978 doBlkDiscard: true, 1979 thinpBlockSize: defaultThinpBlockSize, 1980 deviceIDMap: make([]byte, deviceIDMapSz), 1981 } 1982 1983 foundBlkDiscard := false 1984 for _, option := range options { 1985 key, val, err := parsers.ParseKeyValueOpt(option) 1986 if err != nil { 1987 return nil, err 1988 } 1989 key = strings.ToLower(key) 1990 switch key { 1991 case "dm.basesize": 1992 size, err := units.RAMInBytes(val) 1993 if err != nil { 1994 return nil, err 1995 } 1996 devices.baseFsSize = uint64(size) 1997 case "dm.loopdatasize": 1998 size, err := units.RAMInBytes(val) 1999 if err != nil { 2000 return nil, err 2001 } 2002 devices.dataLoopbackSize = size 2003 case "dm.loopmetadatasize": 2004 size, err := units.RAMInBytes(val) 2005 if err != nil { 2006 return nil, err 2007 } 2008 devices.metaDataLoopbackSize = size 2009 case "dm.fs": 2010 if val != "ext4" && val != "xfs" { 2011 return nil, fmt.Errorf("Unsupported filesystem %s\n", val) 2012 } 2013 devices.filesystem = val 2014 case "dm.mkfsarg": 2015 devices.mkfsArgs = append(devices.mkfsArgs, val) 2016 case "dm.mountopt": 2017 devices.mountOptions = joinMountOptions(devices.mountOptions, val) 2018 case "dm.metadatadev": 2019 devices.metadataDevice = val 2020 case "dm.datadev": 2021 devices.dataDevice = val 2022 case "dm.thinpooldev": 2023 devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/") 2024 case "dm.blkdiscard": 2025 foundBlkDiscard = true 2026 devices.doBlkDiscard, err = strconv.ParseBool(val) 2027 if err != nil { 2028 return nil, err 2029 } 2030 case "dm.blocksize": 2031 size, err := units.RAMInBytes(val) 2032 if err != nil { 2033 return nil, err 2034 } 2035 // convert to 512b sectors 2036 devices.thinpBlockSize = uint32(size) >> 9 2037 case "dm.override_udev_sync_check": 2038 devices.overrideUdevSyncCheck, err = strconv.ParseBool(val) 2039 if err != nil { 2040 return nil, err 2041 } 2042 2043 case "dm.use_deferred_removal": 2044 enableDeferredRemoval, err = strconv.ParseBool(val) 2045 if err != nil { 2046 return nil, err 2047 } 2048 2049 default: 2050 return nil, fmt.Errorf("Unknown option %s\n", key) 2051 } 2052 } 2053 2054 // By default, don't do blk discard hack on raw devices, its rarely useful and is expensive 2055 if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") { 2056 devices.doBlkDiscard = false 2057 } 2058 2059 if err := devices.initDevmapper(doInit); err != nil { 2060 return nil, err 2061 } 2062 2063 return devices, nil 2064 }