github.com/Cloud-Foundations/Dominator@v0.3.4/hypervisor/manager/volumes.go (about)

     1  package manager
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"os/exec"
     9  	"path/filepath"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/Cloud-Foundations/Dominator/lib/format"
    18  	"github.com/Cloud-Foundations/Dominator/lib/fsutil"
    19  	"github.com/Cloud-Foundations/Dominator/lib/fsutil/mounts"
    20  	"github.com/Cloud-Foundations/Dominator/lib/log"
    21  	"github.com/Cloud-Foundations/Dominator/lib/mbr"
    22  	"github.com/Cloud-Foundations/Dominator/lib/wsyscall"
    23  	proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor"
    24  )
    25  
    26  const (
    27  	sysClassBlock = "/sys/class/block"
    28  )
    29  
    30  var (
    31  	memoryVolumeDirectory      string
    32  	memoryVolumeDirectoryMutex sync.Mutex
    33  )
    34  
    35  type mountInfo struct {
    36  	mountEntry *mounts.MountEntry
    37  	size       uint64
    38  }
    39  
    40  // check2fs returns true if the device hosts an ext{2,3,4} file-system.
    41  func check2fs(device string) bool {
    42  	cmd := exec.Command("e2label", device)
    43  	return cmd.Run() == nil
    44  }
    45  
    46  func checkTrim(mountEntry *mounts.MountEntry) bool {
    47  	for _, option := range strings.Split(mountEntry.Options, ",") {
    48  		if option == "discard" {
    49  			return true
    50  		}
    51  	}
    52  	return false
    53  }
    54  
    55  func demapDevice(device string) (string, error) {
    56  	sysDir := filepath.Join(sysClassBlock, filepath.Base(device), "slaves")
    57  	if file, err := os.Open(sysDir); err != nil {
    58  		return device, nil
    59  	} else {
    60  		defer file.Close()
    61  		names, err := file.Readdirnames(-1)
    62  		if err != nil {
    63  			return "", err
    64  		}
    65  		if len(names) != 1 {
    66  			return "", fmt.Errorf("%s has %d entries", device, len(names))
    67  		}
    68  		return filepath.Join("/dev", names[0]), nil
    69  	}
    70  }
    71  
    72  func getFreeSpace(dirname string, freeSpaceTable map[string]uint64) (
    73  	uint64, error) {
    74  	if freeSpace, ok := freeSpaceTable[dirname]; ok {
    75  		return freeSpace, nil
    76  	}
    77  	var statbuf syscall.Statfs_t
    78  	if err := syscall.Statfs(dirname, &statbuf); err != nil {
    79  		return 0, fmt.Errorf("error statfsing: %s: %s", dirname, err)
    80  	}
    81  	// Even though volumes are written as root, treat them as ordinary users so
    82  	// that they don't consume the space reserved for root.
    83  	freeSpace := uint64(statbuf.Bavail * uint64(statbuf.Bsize))
    84  	freeSpaceTable[dirname] = freeSpace
    85  	return freeSpace, nil
    86  }
    87  
    88  func getMemoryVolumeDirectory(logger log.Logger) (string, error) {
    89  	memoryVolumeDirectoryMutex.Lock()
    90  	defer memoryVolumeDirectoryMutex.Unlock()
    91  	if memoryVolumeDirectory != "" {
    92  		return memoryVolumeDirectory, nil
    93  	}
    94  	dirname := "/tmp/hyper-volumes"
    95  	var statbuf wsyscall.Stat_t
    96  	if err := wsyscall.Lstat(dirname, &statbuf); err == nil {
    97  		if statbuf.Mode&wsyscall.S_IFMT != wsyscall.S_IFDIR {
    98  			return "", fmt.Errorf("%s is not a directory", dirname)
    99  		}
   100  		if statbuf.Uid != 0 {
   101  			return "", fmt.Errorf("%s is not owned by root, UID=%d",
   102  				dirname, statbuf.Uid)
   103  		}
   104  	} else if err := os.Mkdir(dirname, fsutil.DirPerms); err != nil {
   105  		return "", err
   106  	}
   107  	mountTable, err := mounts.GetMountTable()
   108  	if err != nil {
   109  		return "", err
   110  	}
   111  	if mountEntry := mountTable.FindEntry(dirname); mountEntry == nil {
   112  		return "", fmt.Errorf("%s: no match in mount table", dirname)
   113  	} else if mountEntry.Type == "tmpfs" {
   114  		memoryVolumeDirectory = dirname
   115  		return memoryVolumeDirectory, nil
   116  	}
   117  	if err := wsyscall.Mount("none", dirname, "tmpfs", 0, ""); err != nil {
   118  		return "", err
   119  	}
   120  	logger.Printf("mounted tmpfs on: %s\n", dirname)
   121  	memoryVolumeDirectory = dirname
   122  	return memoryVolumeDirectory, nil
   123  }
   124  
   125  func getMounts(mountTable *mounts.MountTable) (
   126  	map[string]*mounts.MountEntry, error) {
   127  	mountMap := make(map[string]*mounts.MountEntry)
   128  	for _, entry := range mountTable.Entries {
   129  		if entry.MountPoint == "/boot" {
   130  			continue
   131  		}
   132  		device := entry.Device
   133  		if !strings.HasPrefix(device, "/dev/") {
   134  			continue
   135  		}
   136  		if device == "/dev/root" { // Ignore this dumb shit.
   137  			continue
   138  		}
   139  		if target, err := filepath.EvalSymlinks(device); err != nil {
   140  			return nil, err
   141  		} else {
   142  			device = target
   143  		}
   144  		var err error
   145  		device, err = demapDevice(device)
   146  		if err != nil {
   147  			return nil, err
   148  		}
   149  		device = device[5:]
   150  		if _, ok := mountMap[device]; !ok { // Pick the first mount point.
   151  			mountMap[device] = entry
   152  		}
   153  	}
   154  	return mountMap, nil
   155  }
   156  
   157  // grow2fs will try and grow an ext{2,3,4} file-system to fit the volume size,
   158  // expanding the partition first if appropriate.
   159  func grow2fs(volume string, logger log.DebugLogger) error {
   160  	if check2fs(volume) {
   161  		// Simple case: file-system is on the raw volume, no partition table.
   162  		return resize2fs(volume, 0)
   163  	}
   164  	// Read MBR and check if it's a simple single-partition volume.
   165  	file, err := os.Open(volume)
   166  	if err != nil {
   167  		return err
   168  	}
   169  	partitionTable, err := mbr.Decode(file)
   170  	file.Close()
   171  	if err != nil {
   172  		return err
   173  	}
   174  	if partitionTable == nil {
   175  		return fmt.Errorf("no DOS partition table found")
   176  	}
   177  	if partitionTable.GetPartitionSize(1) > 0 ||
   178  		partitionTable.GetPartitionSize(2) > 0 ||
   179  		partitionTable.GetPartitionSize(3) > 0 {
   180  		return fmt.Errorf("unsupported partition sizes: [%s,%s,%s,%s]",
   181  			format.FormatBytes(partitionTable.GetPartitionSize(0)),
   182  			format.FormatBytes(partitionTable.GetPartitionSize(1)),
   183  			format.FormatBytes(partitionTable.GetPartitionSize(2)),
   184  			format.FormatBytes(partitionTable.GetPartitionSize(3)))
   185  	}
   186  	// Try and extend the partition.
   187  	cmd := exec.Command("parted", "-s", volume, "resizepart", "1", "100%")
   188  	if output, err := cmd.CombinedOutput(); err != nil {
   189  		output = bytes.ReplaceAll(output, carriageReturnLiteral, nil)
   190  		output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement)
   191  		return fmt.Errorf("error running parted for: %s: %s: %s",
   192  			volume, err, string(output))
   193  	}
   194  	// Try and resize the file-system in the partition (need a loop device).
   195  	device, err := fsutil.LoopbackSetupAndWaitForPartition(volume, "p1",
   196  		time.Minute, logger)
   197  	if err != nil {
   198  		return err
   199  	}
   200  	defer fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute,
   201  		logger)
   202  	partition := device + "p1"
   203  	if !check2fs(partition) {
   204  		return nil
   205  	}
   206  	return resize2fs(partition, 0)
   207  }
   208  
   209  // indexToName will return the volume name for the specified volume index (0
   210  // is the "root" volume, 1 is "secondary-volume.0" and so on).
   211  func indexToName(index int) string {
   212  	if index == 0 {
   213  		return "root"
   214  	}
   215  	return fmt.Sprintf("secondary-volume.%d", index-1)
   216  }
   217  
   218  // resize2fs will resize an ext{2,3,4} file-system to fit the specified size.
   219  // If size is zero, it will resize to fit the device size.
   220  func resize2fs(device string, size uint64) error {
   221  	cmd := exec.Command("e2fsck", "-f", "-y", device)
   222  	if output, err := cmd.CombinedOutput(); err != nil {
   223  		output = bytes.ReplaceAll(output, carriageReturnLiteral, nil)
   224  		output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement)
   225  		return fmt.Errorf("error running e2fsck for: %s: %s: %s",
   226  			device, err, string(output))
   227  	}
   228  	cmd = exec.Command("resize2fs", device)
   229  	if size > 0 {
   230  		if size < 1<<20 {
   231  			return fmt.Errorf("size: %d too small", size)
   232  		}
   233  		cmd.Args = append(cmd.Args, strconv.FormatUint(size>>9, 10)+"s")
   234  	}
   235  	if output, err := cmd.CombinedOutput(); err != nil {
   236  		output = bytes.ReplaceAll(output, carriageReturnLiteral, nil)
   237  		output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement)
   238  		return fmt.Errorf("error running resize2fs for: %s: %s: %s",
   239  			device, err, string(output))
   240  	}
   241  	return nil
   242  }
   243  
   244  // shrink2fs will try and shrink an ext{2,3,4} file-system on a volume,
   245  // shrinking the partition afterwards if appropriate.
   246  func shrink2fs(volume string, size uint64, logger log.DebugLogger) error {
   247  	if check2fs(volume) {
   248  		// Simple case: file-system is on the raw volume, no partition table.
   249  		return resize2fs(volume, size)
   250  	}
   251  	// Read MBR and check if it's a simple single-partition volume.
   252  	file, err := os.Open(volume)
   253  	if err != nil {
   254  		return err
   255  	}
   256  	partitionTable, err := mbr.Decode(file)
   257  	file.Close()
   258  	if err != nil {
   259  		return err
   260  	}
   261  	if partitionTable == nil {
   262  		return fmt.Errorf("no DOS partition table found")
   263  	}
   264  	if partitionTable.GetPartitionSize(1) > 0 ||
   265  		partitionTable.GetPartitionSize(2) > 0 ||
   266  		partitionTable.GetPartitionSize(3) > 0 {
   267  		return fmt.Errorf("unsupported partition sizes: [%s,%s,%s,%s]",
   268  			format.FormatBytes(partitionTable.GetPartitionSize(0)),
   269  			format.FormatBytes(partitionTable.GetPartitionSize(1)),
   270  			format.FormatBytes(partitionTable.GetPartitionSize(2)),
   271  			format.FormatBytes(partitionTable.GetPartitionSize(3)))
   272  	}
   273  	size -= partitionTable.GetPartitionOffset(0)
   274  	if size >= partitionTable.GetPartitionSize(0) {
   275  		return errors.New("size greater than existing partition")
   276  	}
   277  	if err := partitionTable.SetPartitionSize(0, size); err != nil {
   278  		return err
   279  	}
   280  	// Try and resize the file-system in the partition (need a loop device).
   281  	device, err := fsutil.LoopbackSetupAndWaitForPartition(volume, "p1",
   282  		time.Minute, logger)
   283  	if err != nil {
   284  		return err
   285  	}
   286  	deleteLoopback := true
   287  	defer func() {
   288  		if deleteLoopback {
   289  			fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute,
   290  				logger)
   291  		}
   292  	}()
   293  	partition := device + "p1"
   294  	if !check2fs(partition) {
   295  		return errors.New("no ext2 file-system found in partition")
   296  	}
   297  	if err := resize2fs(partition, size); err != nil {
   298  		return err
   299  	}
   300  	deleteLoopback = false
   301  	err = fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute,
   302  		logger)
   303  	if err != nil {
   304  		return err
   305  	}
   306  	return partitionTable.Write(volume)
   307  }
   308  
   309  func (m *Manager) checkTrim(filename string) bool {
   310  	return m.volumeInfos[filepath.Dir(filepath.Dir(filename))].CanTrim
   311  }
   312  
   313  func (m *Manager) detectVolumeDirectories(mountTable *mounts.MountTable) error {
   314  	mountMap, err := getMounts(mountTable)
   315  	if err != nil {
   316  		return err
   317  	}
   318  	var mountEntriesToUse []*mounts.MountEntry
   319  	biggestMounts := make(map[string]mountInfo)
   320  	for device, mountEntry := range mountMap {
   321  		sysDir := filepath.Join(sysClassBlock, device)
   322  		linkTarget, err := os.Readlink(sysDir)
   323  		if err != nil {
   324  			if os.IsNotExist(err) {
   325  				continue
   326  			}
   327  			return err
   328  		}
   329  		_, err = os.Stat(filepath.Join(sysDir, "partition"))
   330  		if err != nil {
   331  			if os.IsNotExist(err) { // Not a partition: easy!
   332  				mountEntriesToUse = append(mountEntriesToUse, mountEntry)
   333  				continue
   334  			}
   335  			return err
   336  		}
   337  		var statbuf syscall.Statfs_t
   338  		if err := syscall.Statfs(mountEntry.MountPoint, &statbuf); err != nil {
   339  			return fmt.Errorf("error statfsing: %s: %s",
   340  				mountEntry.MountPoint, err)
   341  		}
   342  		size := uint64(statbuf.Blocks * uint64(statbuf.Bsize))
   343  		parentDevice := filepath.Base(filepath.Dir(linkTarget))
   344  		if biggestMount, ok := biggestMounts[parentDevice]; !ok {
   345  			biggestMounts[parentDevice] = mountInfo{mountEntry, size}
   346  		} else if size > biggestMount.size {
   347  			biggestMounts[parentDevice] = mountInfo{mountEntry, size}
   348  		}
   349  	}
   350  	for _, biggestMount := range biggestMounts {
   351  		mountEntriesToUse = append(mountEntriesToUse, biggestMount.mountEntry)
   352  	}
   353  	for _, entry := range mountEntriesToUse {
   354  		volumeDirectory := filepath.Join(entry.MountPoint, "hyper-volumes")
   355  		m.volumeDirectories = append(m.volumeDirectories, volumeDirectory)
   356  		m.volumeInfos[volumeDirectory] = VolumeInfo{
   357  			CanTrim:    checkTrim(entry),
   358  			MountPoint: entry.MountPoint,
   359  		}
   360  	}
   361  	sort.Strings(m.volumeDirectories)
   362  	return nil
   363  }
   364  
   365  func (m *Manager) findFreeSpace(size uint64, freeSpaceTable map[string]uint64,
   366  	position *int) (string, error) {
   367  	if *position >= len(m.volumeDirectories) {
   368  		*position = 0
   369  	}
   370  	startingPosition := *position
   371  	for {
   372  		freeSpace, err := getFreeSpace(m.volumeDirectories[*position],
   373  			freeSpaceTable)
   374  		if err != nil {
   375  			return "", err
   376  		}
   377  		// Remove space reserved for the object cache but not yet used.
   378  		if *position == 0 && m.objectCache != nil {
   379  			stats := m.objectCache.GetStats()
   380  			if m.ObjectCacheBytes > stats.CachedBytes {
   381  				unused := m.ObjectCacheBytes - stats.CachedBytes
   382  				unused += unused >> 2 // In practice block usage is +30%.
   383  				if unused < freeSpace {
   384  					freeSpace -= unused
   385  				} else {
   386  					freeSpace = 0
   387  				}
   388  			}
   389  		}
   390  		// Keep an extra 1 GiB free space for the root file-system. Be nice.
   391  		if m.volumeInfos[m.volumeDirectories[*position]].MountPoint == "/" {
   392  			if freeSpace > 1<<30 {
   393  				freeSpace -= 1 << 30
   394  			} else {
   395  				freeSpace = 0
   396  			}
   397  		}
   398  		if size < freeSpace {
   399  			dirname := m.volumeDirectories[*position]
   400  			freeSpaceTable[dirname] -= size
   401  			return dirname, nil
   402  		}
   403  		*position++
   404  		if *position >= len(m.volumeDirectories) {
   405  			*position = 0
   406  		}
   407  		if *position == startingPosition {
   408  			return "", fmt.Errorf("not enough free space for %s volume",
   409  				format.FormatBytes(size))
   410  		}
   411  	}
   412  }
   413  
   414  func (m *Manager) getVolumeDirectories(rootSize uint64,
   415  	rootVolumeType proto.VolumeType, secondaryVolumes []proto.Volume,
   416  	spreadVolumes bool) ([]string, error) {
   417  	sizes := make([]uint64, 0, len(secondaryVolumes)+1)
   418  	if rootSize > 0 {
   419  		sizes = append(sizes, rootSize)
   420  	}
   421  	for _, volume := range secondaryVolumes {
   422  		if volume.Size > 0 {
   423  			sizes = append(sizes, volume.Size)
   424  		} else {
   425  			return nil, errors.New("secondary volumes cannot be zero sized")
   426  		}
   427  	}
   428  	freeSpaceTable := make(map[string]uint64, len(m.volumeDirectories))
   429  	directoriesToUse := make([]string, 0, len(sizes))
   430  	position := 0
   431  	for len(sizes) > 0 {
   432  		dirname, err := m.findFreeSpace(sizes[0], freeSpaceTable, &position)
   433  		if err != nil {
   434  			return nil, err
   435  		}
   436  		directoriesToUse = append(directoriesToUse, dirname)
   437  		sizes = sizes[1:]
   438  		if spreadVolumes {
   439  			position++
   440  		}
   441  	}
   442  	for index := range directoriesToUse {
   443  		if (index == 0 && rootVolumeType == proto.VolumeTypeMemory) ||
   444  			(index > 0 && index <= len(secondaryVolumes) &&
   445  				secondaryVolumes[index-1].Type == proto.VolumeTypeMemory) {
   446  			if dirname, err := getMemoryVolumeDirectory(m.Logger); err != nil {
   447  				return nil, err
   448  			} else {
   449  				directoriesToUse[index] = dirname
   450  			}
   451  		}
   452  	}
   453  	return directoriesToUse, nil
   454  }
   455  
   456  func (m *Manager) setupVolumes(startOptions StartOptions) error {
   457  	mountTable, err := mounts.GetMountTable()
   458  	if err != nil {
   459  		return err
   460  	}
   461  	m.volumeInfos = make(map[string]VolumeInfo)
   462  	if len(startOptions.VolumeDirectories) < 1 {
   463  		if err := m.detectVolumeDirectories(mountTable); err != nil {
   464  			return err
   465  		}
   466  	} else {
   467  		m.volumeDirectories = startOptions.VolumeDirectories
   468  		for _, dirname := range m.volumeDirectories {
   469  			if entry := mountTable.FindEntry(dirname); entry != nil {
   470  				m.volumeInfos[dirname] = VolumeInfo{
   471  					CanTrim:    checkTrim(entry),
   472  					MountPoint: entry.MountPoint,
   473  				}
   474  			}
   475  		}
   476  	}
   477  	if len(m.volumeDirectories) < 1 {
   478  		return errors.New("no volume directories available")
   479  	}
   480  	for _, volumeDirectory := range m.volumeDirectories {
   481  		if err := os.MkdirAll(volumeDirectory, fsutil.DirPerms); err != nil {
   482  			return err
   483  		}
   484  		var statbuf syscall.Statfs_t
   485  		if err := syscall.Statfs(volumeDirectory, &statbuf); err != nil {
   486  			return fmt.Errorf("error statfsing: %s: %s", volumeDirectory, err)
   487  		}
   488  		m.totalVolumeBytes += uint64(statbuf.Blocks * uint64(statbuf.Bsize))
   489  	}
   490  	return nil
   491  }