github.com/vmware/govmomi@v0.51.0/simulator/container_host_system.go (about)

     1  // © Broadcom. All Rights Reserved.
     2  // The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package simulator
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  
    11  	"github.com/vmware/govmomi/units"
    12  	"github.com/vmware/govmomi/vim25/methods"
    13  	"github.com/vmware/govmomi/vim25/types"
    14  )
    15  
    16  const (
    17  	advOptPrefixPnicToUnderlayPrefix = "RUN.underlay."
    18  	advOptContainerBackingImage      = "RUN.container"
    19  	defaultUnderlayBridgeName        = "vcsim-underlay"
    20  )
    21  
    22  type simHost struct {
    23  	host *HostSystem
    24  	c    *container
    25  }
    26  
    27  // createSimHostMounts iterates over the provide filesystem mount info, creating docker volumes. It does _not_ delete volumes
    28  // already created if creation of one fails.
    29  // Returns:
    30  // volume mounts: mount options suitable to pass directly to docker
    31  // exec commands: a set of commands to run in the sim host after creation
    32  // error: if construction of the above outputs fails
    33  func createSimHostMounts(ctx *Context, containerName string, mounts []types.HostFileSystemMountInfo) ([]string, [][]string, error) {
    34  	var dockerVol []string
    35  	var symlinkCmds [][]string
    36  
    37  	for i := range mounts {
    38  		info := &mounts[i]
    39  		name := info.Volume.GetHostFileSystemVolume().Name
    40  
    41  		// NOTE: if we ever need persistence cross-invocation we can look at encoding the disk info as a label
    42  		labels := []string{"name=" + name, "container=" + containerName, deleteWithContainer}
    43  		dockerUuid, err := createVolume("", labels, nil)
    44  		if err != nil {
    45  			return nil, nil, err
    46  		}
    47  
    48  		uuid := volumeIDtoHostVolumeUUID(dockerUuid)
    49  		name = strings.Replace(name, uuidToken, uuid, -1)
    50  
    51  		switch vol := info.Volume.(type) {
    52  		case *types.HostVmfsVolume:
    53  			vol.BlockSizeMb = 1
    54  			vol.BlockSize = units.KB
    55  			vol.UnmapGranularity = units.KB
    56  			vol.UnmapPriority = "low"
    57  			vol.MajorVersion = 6
    58  			vol.Version = "6.82"
    59  			vol.Uuid = uuid
    60  			vol.HostFileSystemVolume.Name = name
    61  			for e := range vol.Extent {
    62  				vol.Extent[e].DiskName = "____simulated_volume_____"
    63  				if vol.Extent[e].Partition == 0 {
    64  					// HACK: this should be unique within the diskname, but for now this will suffice
    65  					//  partitions start at 1
    66  					vol.Extent[e].Partition = int32(e + 1)
    67  				}
    68  			}
    69  			vol.Ssd = types.NewBool(true)
    70  			vol.Local = types.NewBool(true)
    71  		case *types.HostVfatVolume:
    72  			vol.HostFileSystemVolume.Name = name
    73  		}
    74  
    75  		info.VStorageSupport = "vStorageUnsupported"
    76  
    77  		info.MountInfo.Path = "/vmfs/volumes/" + uuid
    78  		info.MountInfo.Mounted = types.NewBool(true)
    79  		info.MountInfo.Accessible = types.NewBool(true)
    80  		if info.MountInfo.AccessMode == "" {
    81  			info.MountInfo.AccessMode = "readWrite"
    82  		}
    83  
    84  		opt := "rw"
    85  		if info.MountInfo.AccessMode == "readOnly" {
    86  			opt = "ro"
    87  		}
    88  
    89  		dockerVol = append(dockerVol, fmt.Sprintf("%s:/vmfs/volumes/%s:%s", dockerUuid, uuid, opt))
    90  
    91  		// create symlinks from /vmfs/volumes/ for the Volume Name - the direct mount (path) is only the uuid
    92  		// ? can we do this via a script in the ESX image instead of via exec?
    93  		// ? are the volume names exposed in any manner inside the host? They must be because these mounts exist but where does that come from? Chicken and egg problem? ConfigStore?
    94  		symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), fmt.Sprintf("/vmfs/volumes/%s", name)})
    95  		if strings.HasPrefix(name, "OSDATA") {
    96  			symlinkCmds = append(symlinkCmds, []string{"mkdir", "-p", "/var/lib/vmware"})
    97  			symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), "/var/lib/vmware/osdata"})
    98  		}
    99  	}
   100  
   101  	return dockerVol, symlinkCmds, nil
   102  }
   103  
   104  // createSimHostNetworks creates the networks for the host if not already created. Because we expect multiple hosts on the same network to act as a cluster
   105  // it's likely that only the first host will create networks.
   106  // This includes:
   107  // * bridge network per-pNIC
   108  // * bridge network per-DVS
   109  //
   110  // Returns:
   111  // * array of networks to attach to
   112  // * array of commands to run
   113  // * error
   114  //
   115  // TODO: implement bridge network per DVS - not needed until container backed VMs are "created" on container backed "hosts"
   116  func createSimHostNetworks(ctx *Context, containerName string, networkInfo *types.HostNetworkInfo, advOpts *OptionManager) ([]string, [][]string, error) {
   117  	var dockerNet []string
   118  	var cmds [][]string
   119  
   120  	existingNets := make(map[string]string)
   121  
   122  	// a pnic does not have an IP so this is purely a connectivity statement, not a network identity, however this is not how docker works
   123  	// so we're going to end up with a veth (our pnic) that does have an IP assigned. That IP will end up being used in a NetConfig structure associated
   124  	// with the pNIC. See HostSystem.getNetConfigInterface.
   125  	for i := range networkInfo.Pnic {
   126  		pnicName := networkInfo.Pnic[i].Device
   127  
   128  		bridge := getPnicUnderlay(advOpts, pnicName)
   129  
   130  		if pnic, attached := existingNets[bridge]; attached {
   131  			return nil, nil, fmt.Errorf("cannot attach multiple pNICs to the same underlay: %s and %s both attempting to connect to %s for %s", pnic, pnicName, bridge, containerName)
   132  		}
   133  
   134  		_, err := createBridge(bridge)
   135  		if err != nil {
   136  			return nil, nil, err
   137  		}
   138  
   139  		dockerNet = append(dockerNet, bridge)
   140  		existingNets[bridge] = pnicName
   141  	}
   142  
   143  	return dockerNet, cmds, nil
   144  }
   145  
   146  func getPnicUnderlay(advOpts *OptionManager, pnicName string) string {
   147  	queryRes := advOpts.QueryOptions(&types.QueryOptions{Name: advOptPrefixPnicToUnderlayPrefix + pnicName}).(*methods.QueryOptionsBody).Res
   148  	return queryRes.Returnval[0].GetOptionValue().Value.(string)
   149  }
   150  
   151  // createSimulationHostcreates a simHost binding if the host.ConfigManager.AdvancedOption set contains a key "RUN.container".
   152  // If the set does not contain that key, this returns nil.
   153  // Methods on the simHost type are written to check for nil object so the return from this call can be blindly
   154  // assigned and invoked without the caller caring about whether a binding for a backing container was warranted.
   155  //
   156  // The created simhost is based off of the details of the supplied host system.
   157  // VMFS locations are created based on FileSystemMountInfo
   158  // Bridge networks are created to simulate underlay networks - one per pNIC. You cannot connect two pNICs to the same underlay.
   159  //
   160  // On Network connectivity - initially this is using docker network constructs. This means we cannot easily use nested "ip netns" so we cannot
   161  // have a perfect representation of the ESX structure: pnic(veth)->vswtich(bridge)->{vmk,vnic}(veth)
   162  // Instead we have the following:
   163  // * bridge network per underlay - everything connects directly to the underlay
   164  // * VMs/CRXs connect to the underlay dictated by the Uplink pNIC attached to their vSwitch
   165  // * hostd vmknic gets the "host" container IP - we don't currently support multiple vmknics with different IPs
   166  // * no support for mocking VLANs
   167  func createSimulationHost(ctx *Context, host *HostSystem) (*simHost, error) {
   168  	sh := &simHost{
   169  		host: host,
   170  	}
   171  
   172  	advOpts := ctx.Map.Get(host.ConfigManager.AdvancedOption.Reference()).(*OptionManager)
   173  	fault := advOpts.QueryOptions(&types.QueryOptions{Name: "RUN.container"}).(*methods.QueryOptionsBody).Fault()
   174  	if fault != nil {
   175  		if _, ok := fault.VimFault().(*types.InvalidName); ok {
   176  			return nil, nil
   177  		}
   178  		return nil, fmt.Errorf("errror retrieving container backing from host config manager: %+v", fault.VimFault())
   179  	}
   180  
   181  	// assemble env
   182  	var dockerEnv []string
   183  
   184  	var execCmds [][]string
   185  
   186  	var err error
   187  
   188  	hName := host.Summary.Config.Name
   189  	hUuid := host.Summary.Hardware.Uuid
   190  	containerName := constructContainerName(hName, hUuid)
   191  
   192  	// create volumes and mounts
   193  	dockerVol, volCmds, err := createSimHostMounts(ctx, containerName, host.Config.FileSystemVolume.MountInfo)
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  	execCmds = append(execCmds, volCmds...)
   198  
   199  	// create networks
   200  	dockerNet, netCmds, err := createSimHostNetworks(ctx, containerName, host.Config.Network, advOpts)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	execCmds = append(execCmds, netCmds...)
   205  
   206  	// create the container
   207  	sh.c, err = create(ctx, hName, hUuid, dockerNet, dockerVol, nil, dockerEnv, "alpine:3.20.3", []string{"sleep", "infinity"})
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  
   212  	// start the container
   213  	err = sh.c.start(ctx)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  
   218  	// run post-creation steps
   219  	for _, cmd := range execCmds {
   220  		_, err := sh.c.exec(ctx, cmd)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  	}
   225  
   226  	_, detail, err := sh.c.inspect()
   227  	if err != nil {
   228  		return nil, err
   229  	}
   230  	for i := range host.Config.Network.Pnic {
   231  		pnic := &host.Config.Network.Pnic[i]
   232  		bridge := getPnicUnderlay(advOpts, pnic.Device)
   233  		settings := detail.NetworkSettings.Networks[bridge]
   234  
   235  		// it doesn't really make sense at an ESX level to set this information as IP bindings are associated with
   236  		// vnics (VMs) or vmknics (daemons such as hostd).
   237  		// However it's a useful location to stash this info in a manner that can be retrieved at a later date.
   238  		pnic.Spec.Ip.IpAddress = settings.IPAddress
   239  		pnic.Spec.Ip.SubnetMask = prefixToMask(settings.IPPrefixLen)
   240  
   241  		pnic.Mac = settings.MacAddress
   242  	}
   243  
   244  	// update the active "management" nicType with the container IP for vmnic0
   245  	netconfig, err := host.getNetConfigInterface(ctx, "management")
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  	netconfig.vmk.Spec.Ip.IpAddress = netconfig.uplink.Spec.Ip.IpAddress
   250  	netconfig.vmk.Spec.Ip.SubnetMask = netconfig.uplink.Spec.Ip.SubnetMask
   251  	netconfig.vmk.Spec.Mac = netconfig.uplink.Mac
   252  
   253  	return sh, nil
   254  }
   255  
   256  // remove destroys the container associated with the host and any volumes with labels specifying their lifecycle
   257  // is coupled with the container
   258  func (sh *simHost) remove(ctx *Context) error {
   259  	if sh == nil {
   260  		return nil
   261  	}
   262  
   263  	return sh.c.remove(ctx)
   264  }
   265  
   266  // volumeIDtoHostVolumeUUID takes the 64 char docker uuid and converts it into a 32char ESX form of 8-8-4-12
   267  // Perhaps we should do this using an md5 rehash, but instead we just take the first 32char for ease of cross-reference.
   268  func volumeIDtoHostVolumeUUID(id string) string {
   269  	return fmt.Sprintf("%s-%s-%s-%s", id[0:8], id[8:16], id[16:20], id[20:32])
   270  }
   271  
   272  // By reference to physical system, partition numbering tends to work out like this:
   273  // 1. EFI System (100 MB)
   274  // Free space (1.97 MB)
   275  // 5. Basic Data (4 GB) (bootbank1)
   276  // 6. Basic Data (4 GB) (bootbank2)
   277  // 7. VMFSL (119.9 GB)  (os-data)
   278  // 8. VMFS (1 TB)       (datastore1)
   279  // I assume the jump from 1 -> 5 harks back to the primary/logical partitions from MBT days
   280  const uuidToken = "%__UUID__%"
   281  
   282  var defaultSimVolumes = []types.HostFileSystemMountInfo{
   283  	{
   284  		MountInfo: types.HostMountInfo{
   285  			AccessMode: "readWrite",
   286  		},
   287  		Volume: &types.HostVmfsVolume{
   288  			HostFileSystemVolume: types.HostFileSystemVolume{
   289  				Type:     "VMFS",
   290  				Name:     "datastore1",
   291  				Capacity: 1 * units.TB,
   292  			},
   293  			Extent: []types.HostScsiDiskPartition{
   294  				{
   295  					Partition: 8,
   296  				},
   297  			},
   298  		},
   299  	},
   300  	{
   301  		MountInfo: types.HostMountInfo{
   302  			AccessMode: "readWrite",
   303  		},
   304  		Volume: &types.HostVmfsVolume{
   305  			HostFileSystemVolume: types.HostFileSystemVolume{
   306  				Type:     "OTHER",
   307  				Name:     "OSDATA-%__UUID__%",
   308  				Capacity: 128 * units.GB,
   309  			},
   310  			Extent: []types.HostScsiDiskPartition{
   311  				{
   312  					Partition: 7,
   313  				},
   314  			},
   315  		},
   316  	},
   317  	{
   318  		MountInfo: types.HostMountInfo{
   319  			AccessMode: "readOnly",
   320  		},
   321  		Volume: &types.HostVfatVolume{
   322  			HostFileSystemVolume: types.HostFileSystemVolume{
   323  				Type:     "OTHER",
   324  				Name:     "BOOTBANK1",
   325  				Capacity: 4 * units.GB,
   326  			},
   327  		},
   328  	},
   329  	{
   330  		MountInfo: types.HostMountInfo{
   331  			AccessMode: "readOnly",
   332  		},
   333  		Volume: &types.HostVfatVolume{
   334  			HostFileSystemVolume: types.HostFileSystemVolume{
   335  				Type:     "OTHER",
   336  				Name:     "BOOTBANK2",
   337  				Capacity: 4 * units.GB,
   338  			},
   339  		},
   340  	},
   341  }