github.com/Cloud-Foundations/Dominator@v0.3.4/hypervisor/manager/volumes.go (about) 1 package manager 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "os" 8 "os/exec" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/Cloud-Foundations/Dominator/lib/format" 18 "github.com/Cloud-Foundations/Dominator/lib/fsutil" 19 "github.com/Cloud-Foundations/Dominator/lib/fsutil/mounts" 20 "github.com/Cloud-Foundations/Dominator/lib/log" 21 "github.com/Cloud-Foundations/Dominator/lib/mbr" 22 "github.com/Cloud-Foundations/Dominator/lib/wsyscall" 23 proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor" 24 ) 25 26 const ( 27 sysClassBlock = "/sys/class/block" 28 ) 29 30 var ( 31 memoryVolumeDirectory string 32 memoryVolumeDirectoryMutex sync.Mutex 33 ) 34 35 type mountInfo struct { 36 mountEntry *mounts.MountEntry 37 size uint64 38 } 39 40 // check2fs returns true if the device hosts an ext{2,3,4} file-system. 41 func check2fs(device string) bool { 42 cmd := exec.Command("e2label", device) 43 return cmd.Run() == nil 44 } 45 46 func checkTrim(mountEntry *mounts.MountEntry) bool { 47 for _, option := range strings.Split(mountEntry.Options, ",") { 48 if option == "discard" { 49 return true 50 } 51 } 52 return false 53 } 54 55 func demapDevice(device string) (string, error) { 56 sysDir := filepath.Join(sysClassBlock, filepath.Base(device), "slaves") 57 if file, err := os.Open(sysDir); err != nil { 58 return device, nil 59 } else { 60 defer file.Close() 61 names, err := file.Readdirnames(-1) 62 if err != nil { 63 return "", err 64 } 65 if len(names) != 1 { 66 return "", fmt.Errorf("%s has %d entries", device, len(names)) 67 } 68 return filepath.Join("/dev", names[0]), nil 69 } 70 } 71 72 func getFreeSpace(dirname string, freeSpaceTable map[string]uint64) ( 73 uint64, error) { 74 if freeSpace, ok := freeSpaceTable[dirname]; ok { 75 return freeSpace, nil 76 } 77 var statbuf syscall.Statfs_t 78 if err := syscall.Statfs(dirname, &statbuf); err != nil { 79 return 0, fmt.Errorf("error statfsing: %s: %s", dirname, err) 80 } 81 // Even though volumes are written as root, treat them as ordinary users so 82 // that they don't consume the space reserved for root. 83 freeSpace := uint64(statbuf.Bavail * uint64(statbuf.Bsize)) 84 freeSpaceTable[dirname] = freeSpace 85 return freeSpace, nil 86 } 87 88 func getMemoryVolumeDirectory(logger log.Logger) (string, error) { 89 memoryVolumeDirectoryMutex.Lock() 90 defer memoryVolumeDirectoryMutex.Unlock() 91 if memoryVolumeDirectory != "" { 92 return memoryVolumeDirectory, nil 93 } 94 dirname := "/tmp/hyper-volumes" 95 var statbuf wsyscall.Stat_t 96 if err := wsyscall.Lstat(dirname, &statbuf); err == nil { 97 if statbuf.Mode&wsyscall.S_IFMT != wsyscall.S_IFDIR { 98 return "", fmt.Errorf("%s is not a directory", dirname) 99 } 100 if statbuf.Uid != 0 { 101 return "", fmt.Errorf("%s is not owned by root, UID=%d", 102 dirname, statbuf.Uid) 103 } 104 } else if err := os.Mkdir(dirname, fsutil.DirPerms); err != nil { 105 return "", err 106 } 107 mountTable, err := mounts.GetMountTable() 108 if err != nil { 109 return "", err 110 } 111 if mountEntry := mountTable.FindEntry(dirname); mountEntry == nil { 112 return "", fmt.Errorf("%s: no match in mount table", dirname) 113 } else if mountEntry.Type == "tmpfs" { 114 memoryVolumeDirectory = dirname 115 return memoryVolumeDirectory, nil 116 } 117 if err := wsyscall.Mount("none", dirname, "tmpfs", 0, ""); err != nil { 118 return "", err 119 } 120 logger.Printf("mounted tmpfs on: %s\n", dirname) 121 memoryVolumeDirectory = dirname 122 return memoryVolumeDirectory, nil 123 } 124 125 func getMounts(mountTable *mounts.MountTable) ( 126 map[string]*mounts.MountEntry, error) { 127 mountMap := make(map[string]*mounts.MountEntry) 128 for _, entry := range mountTable.Entries { 129 if entry.MountPoint == "/boot" { 130 continue 131 } 132 device := entry.Device 133 if !strings.HasPrefix(device, "/dev/") { 134 continue 135 } 136 if device == "/dev/root" { // Ignore this dumb shit. 137 continue 138 } 139 if target, err := filepath.EvalSymlinks(device); err != nil { 140 return nil, err 141 } else { 142 device = target 143 } 144 var err error 145 device, err = demapDevice(device) 146 if err != nil { 147 return nil, err 148 } 149 device = device[5:] 150 if _, ok := mountMap[device]; !ok { // Pick the first mount point. 151 mountMap[device] = entry 152 } 153 } 154 return mountMap, nil 155 } 156 157 // grow2fs will try and grow an ext{2,3,4} file-system to fit the volume size, 158 // expanding the partition first if appropriate. 159 func grow2fs(volume string, logger log.DebugLogger) error { 160 if check2fs(volume) { 161 // Simple case: file-system is on the raw volume, no partition table. 162 return resize2fs(volume, 0) 163 } 164 // Read MBR and check if it's a simple single-partition volume. 165 file, err := os.Open(volume) 166 if err != nil { 167 return err 168 } 169 partitionTable, err := mbr.Decode(file) 170 file.Close() 171 if err != nil { 172 return err 173 } 174 if partitionTable == nil { 175 return fmt.Errorf("no DOS partition table found") 176 } 177 if partitionTable.GetPartitionSize(1) > 0 || 178 partitionTable.GetPartitionSize(2) > 0 || 179 partitionTable.GetPartitionSize(3) > 0 { 180 return fmt.Errorf("unsupported partition sizes: [%s,%s,%s,%s]", 181 format.FormatBytes(partitionTable.GetPartitionSize(0)), 182 format.FormatBytes(partitionTable.GetPartitionSize(1)), 183 format.FormatBytes(partitionTable.GetPartitionSize(2)), 184 format.FormatBytes(partitionTable.GetPartitionSize(3))) 185 } 186 // Try and extend the partition. 187 cmd := exec.Command("parted", "-s", volume, "resizepart", "1", "100%") 188 if output, err := cmd.CombinedOutput(); err != nil { 189 output = bytes.ReplaceAll(output, carriageReturnLiteral, nil) 190 output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement) 191 return fmt.Errorf("error running parted for: %s: %s: %s", 192 volume, err, string(output)) 193 } 194 // Try and resize the file-system in the partition (need a loop device). 195 device, err := fsutil.LoopbackSetupAndWaitForPartition(volume, "p1", 196 time.Minute, logger) 197 if err != nil { 198 return err 199 } 200 defer fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute, 201 logger) 202 partition := device + "p1" 203 if !check2fs(partition) { 204 return nil 205 } 206 return resize2fs(partition, 0) 207 } 208 209 // indexToName will return the volume name for the specified volume index (0 210 // is the "root" volume, 1 is "secondary-volume.0" and so on). 211 func indexToName(index int) string { 212 if index == 0 { 213 return "root" 214 } 215 return fmt.Sprintf("secondary-volume.%d", index-1) 216 } 217 218 // resize2fs will resize an ext{2,3,4} file-system to fit the specified size. 219 // If size is zero, it will resize to fit the device size. 220 func resize2fs(device string, size uint64) error { 221 cmd := exec.Command("e2fsck", "-f", "-y", device) 222 if output, err := cmd.CombinedOutput(); err != nil { 223 output = bytes.ReplaceAll(output, carriageReturnLiteral, nil) 224 output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement) 225 return fmt.Errorf("error running e2fsck for: %s: %s: %s", 226 device, err, string(output)) 227 } 228 cmd = exec.Command("resize2fs", device) 229 if size > 0 { 230 if size < 1<<20 { 231 return fmt.Errorf("size: %d too small", size) 232 } 233 cmd.Args = append(cmd.Args, strconv.FormatUint(size>>9, 10)+"s") 234 } 235 if output, err := cmd.CombinedOutput(); err != nil { 236 output = bytes.ReplaceAll(output, carriageReturnLiteral, nil) 237 output = bytes.ReplaceAll(output, newlineLiteral, newlineReplacement) 238 return fmt.Errorf("error running resize2fs for: %s: %s: %s", 239 device, err, string(output)) 240 } 241 return nil 242 } 243 244 // shrink2fs will try and shrink an ext{2,3,4} file-system on a volume, 245 // shrinking the partition afterwards if appropriate. 246 func shrink2fs(volume string, size uint64, logger log.DebugLogger) error { 247 if check2fs(volume) { 248 // Simple case: file-system is on the raw volume, no partition table. 249 return resize2fs(volume, size) 250 } 251 // Read MBR and check if it's a simple single-partition volume. 252 file, err := os.Open(volume) 253 if err != nil { 254 return err 255 } 256 partitionTable, err := mbr.Decode(file) 257 file.Close() 258 if err != nil { 259 return err 260 } 261 if partitionTable == nil { 262 return fmt.Errorf("no DOS partition table found") 263 } 264 if partitionTable.GetPartitionSize(1) > 0 || 265 partitionTable.GetPartitionSize(2) > 0 || 266 partitionTable.GetPartitionSize(3) > 0 { 267 return fmt.Errorf("unsupported partition sizes: [%s,%s,%s,%s]", 268 format.FormatBytes(partitionTable.GetPartitionSize(0)), 269 format.FormatBytes(partitionTable.GetPartitionSize(1)), 270 format.FormatBytes(partitionTable.GetPartitionSize(2)), 271 format.FormatBytes(partitionTable.GetPartitionSize(3))) 272 } 273 size -= partitionTable.GetPartitionOffset(0) 274 if size >= partitionTable.GetPartitionSize(0) { 275 return errors.New("size greater than existing partition") 276 } 277 if err := partitionTable.SetPartitionSize(0, size); err != nil { 278 return err 279 } 280 // Try and resize the file-system in the partition (need a loop device). 281 device, err := fsutil.LoopbackSetupAndWaitForPartition(volume, "p1", 282 time.Minute, logger) 283 if err != nil { 284 return err 285 } 286 deleteLoopback := true 287 defer func() { 288 if deleteLoopback { 289 fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute, 290 logger) 291 } 292 }() 293 partition := device + "p1" 294 if !check2fs(partition) { 295 return errors.New("no ext2 file-system found in partition") 296 } 297 if err := resize2fs(partition, size); err != nil { 298 return err 299 } 300 deleteLoopback = false 301 err = fsutil.LoopbackDeleteAndWaitForPartition(device, "p1", time.Minute, 302 logger) 303 if err != nil { 304 return err 305 } 306 return partitionTable.Write(volume) 307 } 308 309 func (m *Manager) checkTrim(filename string) bool { 310 return m.volumeInfos[filepath.Dir(filepath.Dir(filename))].CanTrim 311 } 312 313 func (m *Manager) detectVolumeDirectories(mountTable *mounts.MountTable) error { 314 mountMap, err := getMounts(mountTable) 315 if err != nil { 316 return err 317 } 318 var mountEntriesToUse []*mounts.MountEntry 319 biggestMounts := make(map[string]mountInfo) 320 for device, mountEntry := range mountMap { 321 sysDir := filepath.Join(sysClassBlock, device) 322 linkTarget, err := os.Readlink(sysDir) 323 if err != nil { 324 if os.IsNotExist(err) { 325 continue 326 } 327 return err 328 } 329 _, err = os.Stat(filepath.Join(sysDir, "partition")) 330 if err != nil { 331 if os.IsNotExist(err) { // Not a partition: easy! 332 mountEntriesToUse = append(mountEntriesToUse, mountEntry) 333 continue 334 } 335 return err 336 } 337 var statbuf syscall.Statfs_t 338 if err := syscall.Statfs(mountEntry.MountPoint, &statbuf); err != nil { 339 return fmt.Errorf("error statfsing: %s: %s", 340 mountEntry.MountPoint, err) 341 } 342 size := uint64(statbuf.Blocks * uint64(statbuf.Bsize)) 343 parentDevice := filepath.Base(filepath.Dir(linkTarget)) 344 if biggestMount, ok := biggestMounts[parentDevice]; !ok { 345 biggestMounts[parentDevice] = mountInfo{mountEntry, size} 346 } else if size > biggestMount.size { 347 biggestMounts[parentDevice] = mountInfo{mountEntry, size} 348 } 349 } 350 for _, biggestMount := range biggestMounts { 351 mountEntriesToUse = append(mountEntriesToUse, biggestMount.mountEntry) 352 } 353 for _, entry := range mountEntriesToUse { 354 volumeDirectory := filepath.Join(entry.MountPoint, "hyper-volumes") 355 m.volumeDirectories = append(m.volumeDirectories, volumeDirectory) 356 m.volumeInfos[volumeDirectory] = VolumeInfo{ 357 CanTrim: checkTrim(entry), 358 MountPoint: entry.MountPoint, 359 } 360 } 361 sort.Strings(m.volumeDirectories) 362 return nil 363 } 364 365 func (m *Manager) findFreeSpace(size uint64, freeSpaceTable map[string]uint64, 366 position *int) (string, error) { 367 if *position >= len(m.volumeDirectories) { 368 *position = 0 369 } 370 startingPosition := *position 371 for { 372 freeSpace, err := getFreeSpace(m.volumeDirectories[*position], 373 freeSpaceTable) 374 if err != nil { 375 return "", err 376 } 377 // Remove space reserved for the object cache but not yet used. 378 if *position == 0 && m.objectCache != nil { 379 stats := m.objectCache.GetStats() 380 if m.ObjectCacheBytes > stats.CachedBytes { 381 unused := m.ObjectCacheBytes - stats.CachedBytes 382 unused += unused >> 2 // In practice block usage is +30%. 383 if unused < freeSpace { 384 freeSpace -= unused 385 } else { 386 freeSpace = 0 387 } 388 } 389 } 390 // Keep an extra 1 GiB free space for the root file-system. Be nice. 391 if m.volumeInfos[m.volumeDirectories[*position]].MountPoint == "/" { 392 if freeSpace > 1<<30 { 393 freeSpace -= 1 << 30 394 } else { 395 freeSpace = 0 396 } 397 } 398 if size < freeSpace { 399 dirname := m.volumeDirectories[*position] 400 freeSpaceTable[dirname] -= size 401 return dirname, nil 402 } 403 *position++ 404 if *position >= len(m.volumeDirectories) { 405 *position = 0 406 } 407 if *position == startingPosition { 408 return "", fmt.Errorf("not enough free space for %s volume", 409 format.FormatBytes(size)) 410 } 411 } 412 } 413 414 func (m *Manager) getVolumeDirectories(rootSize uint64, 415 rootVolumeType proto.VolumeType, secondaryVolumes []proto.Volume, 416 spreadVolumes bool) ([]string, error) { 417 sizes := make([]uint64, 0, len(secondaryVolumes)+1) 418 if rootSize > 0 { 419 sizes = append(sizes, rootSize) 420 } 421 for _, volume := range secondaryVolumes { 422 if volume.Size > 0 { 423 sizes = append(sizes, volume.Size) 424 } else { 425 return nil, errors.New("secondary volumes cannot be zero sized") 426 } 427 } 428 freeSpaceTable := make(map[string]uint64, len(m.volumeDirectories)) 429 directoriesToUse := make([]string, 0, len(sizes)) 430 position := 0 431 for len(sizes) > 0 { 432 dirname, err := m.findFreeSpace(sizes[0], freeSpaceTable, &position) 433 if err != nil { 434 return nil, err 435 } 436 directoriesToUse = append(directoriesToUse, dirname) 437 sizes = sizes[1:] 438 if spreadVolumes { 439 position++ 440 } 441 } 442 for index := range directoriesToUse { 443 if (index == 0 && rootVolumeType == proto.VolumeTypeMemory) || 444 (index > 0 && index <= len(secondaryVolumes) && 445 secondaryVolumes[index-1].Type == proto.VolumeTypeMemory) { 446 if dirname, err := getMemoryVolumeDirectory(m.Logger); err != nil { 447 return nil, err 448 } else { 449 directoriesToUse[index] = dirname 450 } 451 } 452 } 453 return directoriesToUse, nil 454 } 455 456 func (m *Manager) setupVolumes(startOptions StartOptions) error { 457 mountTable, err := mounts.GetMountTable() 458 if err != nil { 459 return err 460 } 461 m.volumeInfos = make(map[string]VolumeInfo) 462 if len(startOptions.VolumeDirectories) < 1 { 463 if err := m.detectVolumeDirectories(mountTable); err != nil { 464 return err 465 } 466 } else { 467 m.volumeDirectories = startOptions.VolumeDirectories 468 for _, dirname := range m.volumeDirectories { 469 if entry := mountTable.FindEntry(dirname); entry != nil { 470 m.volumeInfos[dirname] = VolumeInfo{ 471 CanTrim: checkTrim(entry), 472 MountPoint: entry.MountPoint, 473 } 474 } 475 } 476 } 477 if len(m.volumeDirectories) < 1 { 478 return errors.New("no volume directories available") 479 } 480 for _, volumeDirectory := range m.volumeDirectories { 481 if err := os.MkdirAll(volumeDirectory, fsutil.DirPerms); err != nil { 482 return err 483 } 484 var statbuf syscall.Statfs_t 485 if err := syscall.Statfs(volumeDirectory, &statbuf); err != nil { 486 return fmt.Errorf("error statfsing: %s: %s", volumeDirectory, err) 487 } 488 m.totalVolumeBytes += uint64(statbuf.Blocks * uint64(statbuf.Bsize)) 489 } 490 return nil 491 }