github.com/vmware/govmomi@v0.37.2/simulator/container_host_system.go (about)

     1  /*
     2  Copyright (c) 2023-2023 VMware, Inc. All Rights Reserved.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package simulator
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  
    23  	"github.com/vmware/govmomi/units"
    24  	"github.com/vmware/govmomi/vim25/methods"
    25  	"github.com/vmware/govmomi/vim25/types"
    26  )
    27  
    28  const (
    29  	advOptPrefixPnicToUnderlayPrefix = "RUN.underlay."
    30  	advOptContainerBackingImage      = "RUN.container"
    31  	defaultUnderlayBridgeName        = "vcsim-underlay"
    32  )
    33  
    34  type simHost struct {
    35  	host *HostSystem
    36  	c    *container
    37  }
    38  
    39  // createSimHostMounts iterates over the provide filesystem mount info, creating docker volumes. It does _not_ delete volumes
    40  // already created if creation of one fails.
    41  // Returns:
    42  // volume mounts: mount options suitable to pass directly to docker
    43  // exec commands: a set of commands to run in the sim host after creation
    44  // error: if construction of the above outputs fails
    45  func createSimHostMounts(ctx *Context, containerName string, mounts []types.HostFileSystemMountInfo) ([]string, [][]string, error) {
    46  	var dockerVol []string
    47  	var symlinkCmds [][]string
    48  
    49  	for i := range mounts {
    50  		info := &mounts[i]
    51  		name := info.Volume.GetHostFileSystemVolume().Name
    52  
    53  		// NOTE: if we ever need persistence cross-invocation we can look at encoding the disk info as a label
    54  		labels := []string{"name=" + name, "container=" + containerName, deleteWithContainer}
    55  		dockerUuid, err := createVolume("", labels, nil)
    56  		if err != nil {
    57  			return nil, nil, err
    58  		}
    59  
    60  		uuid := volumeIDtoHostVolumeUUID(dockerUuid)
    61  		name = strings.Replace(name, uuidToken, uuid, -1)
    62  
    63  		switch vol := info.Volume.(type) {
    64  		case *types.HostVmfsVolume:
    65  			vol.BlockSizeMb = 1
    66  			vol.BlockSize = units.KB
    67  			vol.UnmapGranularity = units.KB
    68  			vol.UnmapPriority = "low"
    69  			vol.MajorVersion = 6
    70  			vol.Version = "6.82"
    71  			vol.Uuid = uuid
    72  			vol.HostFileSystemVolume.Name = name
    73  			for e := range vol.Extent {
    74  				vol.Extent[e].DiskName = "____simulated_volume_____"
    75  				if vol.Extent[e].Partition == 0 {
    76  					// HACK: this should be unique within the diskname, but for now this will suffice
    77  					//  partitions start at 1
    78  					vol.Extent[e].Partition = int32(e + 1)
    79  				}
    80  			}
    81  			vol.Ssd = types.NewBool(true)
    82  			vol.Local = types.NewBool(true)
    83  		case *types.HostVfatVolume:
    84  			vol.HostFileSystemVolume.Name = name
    85  		}
    86  
    87  		info.VStorageSupport = "vStorageUnsupported"
    88  
    89  		info.MountInfo.Path = "/vmfs/volumes/" + uuid
    90  		info.MountInfo.Mounted = types.NewBool(true)
    91  		info.MountInfo.Accessible = types.NewBool(true)
    92  		if info.MountInfo.AccessMode == "" {
    93  			info.MountInfo.AccessMode = "readWrite"
    94  		}
    95  
    96  		opt := "rw"
    97  		if info.MountInfo.AccessMode == "readOnly" {
    98  			opt = "ro"
    99  		}
   100  
   101  		dockerVol = append(dockerVol, fmt.Sprintf("%s:/vmfs/volumes/%s:%s", dockerUuid, uuid, opt))
   102  
   103  		// create symlinks from /vmfs/volumes/ for the Volume Name - the direct mount (path) is only the uuid
   104  		// ? can we do this via a script in the ESX image instead of via exec?
   105  		// ? are the volume names exposed in any manner inside the host? They must be because these mounts exist but where does that come from? Chicken and egg problem? ConfigStore?
   106  		symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), fmt.Sprintf("/vmfs/volumes/%s", name)})
   107  		if strings.HasPrefix(name, "OSDATA") {
   108  			symlinkCmds = append(symlinkCmds, []string{"mkdir", "-p", "/var/lib/vmware"})
   109  			symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), "/var/lib/vmware/osdata"})
   110  		}
   111  	}
   112  
   113  	return dockerVol, symlinkCmds, nil
   114  }
   115  
   116  // createSimHostNetworks creates the networks for the host if not already created. Because we expect multiple hosts on the same network to act as a cluster
   117  // it's likely that only the first host will create networks.
   118  // This includes:
   119  // * bridge network per-pNIC
   120  // * bridge network per-DVS
   121  //
   122  // Returns:
   123  // * array of networks to attach to
   124  // * array of commands to run
   125  // * error
   126  //
   127  // TODO: implement bridge network per DVS - not needed until container backed VMs are "created" on container backed "hosts"
   128  func createSimHostNetworks(ctx *Context, containerName string, networkInfo *types.HostNetworkInfo, advOpts *OptionManager) ([]string, [][]string, error) {
   129  	var dockerNet []string
   130  	var cmds [][]string
   131  
   132  	existingNets := make(map[string]string)
   133  
   134  	// a pnic does not have an IP so this is purely a connectivity statement, not a network identity, however this is not how docker works
   135  	// so we're going to end up with a veth (our pnic) that does have an IP assigned. That IP will end up being used in a NetConfig structure associated
   136  	// with the pNIC. See HostSystem.getNetConfigInterface.
   137  	for i := range networkInfo.Pnic {
   138  		pnicName := networkInfo.Pnic[i].Device
   139  
   140  		bridge := getPnicUnderlay(advOpts, pnicName)
   141  
   142  		if pnic, attached := existingNets[bridge]; attached {
   143  			return nil, nil, fmt.Errorf("cannot attach multiple pNICs to the same underlay: %s and %s both attempting to connect to %s for %s", pnic, pnicName, bridge, containerName)
   144  		}
   145  
   146  		_, err := createBridge(bridge)
   147  		if err != nil {
   148  			return nil, nil, err
   149  		}
   150  
   151  		dockerNet = append(dockerNet, bridge)
   152  		existingNets[bridge] = pnicName
   153  	}
   154  
   155  	return dockerNet, cmds, nil
   156  }
   157  
   158  func getPnicUnderlay(advOpts *OptionManager, pnicName string) string {
   159  	queryRes := advOpts.QueryOptions(&types.QueryOptions{Name: advOptPrefixPnicToUnderlayPrefix + pnicName}).(*methods.QueryOptionsBody).Res
   160  	return queryRes.Returnval[0].GetOptionValue().Value.(string)
   161  }
   162  
   163  // createSimulationHostcreates a simHost binding if the host.ConfigManager.AdvancedOption set contains a key "RUN.container".
   164  // If the set does not contain that key, this returns nil.
   165  // Methods on the simHost type are written to check for nil object so the return from this call can be blindly
   166  // assigned and invoked without the caller caring about whether a binding for a backing container was warranted.
   167  //
   168  // The created simhost is based off of the details of the supplied host system.
   169  // VMFS locations are created based on FileSystemMountInfo
   170  // Bridge networks are created to simulate underlay networks - one per pNIC. You cannot connect two pNICs to the same underlay.
   171  //
   172  // On Network connectivity - initially this is using docker network constructs. This means we cannot easily use nested "ip netns" so we cannot
   173  // have a perfect representation of the ESX structure: pnic(veth)->vswtich(bridge)->{vmk,vnic}(veth)
   174  // Instead we have the following:
   175  // * bridge network per underlay - everything connects directly to the underlay
   176  // * VMs/CRXs connect to the underlay dictated by the Uplink pNIC attached to their vSwitch
   177  // * hostd vmknic gets the "host" container IP - we don't currently support multiple vmknics with different IPs
   178  // * no support for mocking VLANs
   179  func createSimulationHost(ctx *Context, host *HostSystem) (*simHost, error) {
   180  	sh := &simHost{
   181  		host: host,
   182  	}
   183  
   184  	advOpts := ctx.Map.Get(host.ConfigManager.AdvancedOption.Reference()).(*OptionManager)
   185  	fault := advOpts.QueryOptions(&types.QueryOptions{Name: "RUN.container"}).(*methods.QueryOptionsBody).Fault()
   186  	if fault != nil {
   187  		if _, ok := fault.VimFault().(*types.InvalidName); ok {
   188  			return nil, nil
   189  		}
   190  		return nil, fmt.Errorf("errror retrieving container backing from host config manager: %+v", fault.VimFault())
   191  	}
   192  
   193  	// assemble env
   194  	var dockerEnv []string
   195  
   196  	var execCmds [][]string
   197  
   198  	var err error
   199  
   200  	hName := host.Summary.Config.Name
   201  	hUuid := host.Summary.Hardware.Uuid
   202  	containerName := constructContainerName(hName, hUuid)
   203  
   204  	// create volumes and mounts
   205  	dockerVol, volCmds, err := createSimHostMounts(ctx, containerName, host.Config.FileSystemVolume.MountInfo)
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	execCmds = append(execCmds, volCmds...)
   210  
   211  	// create networks
   212  	dockerNet, netCmds, err := createSimHostNetworks(ctx, containerName, host.Config.Network, advOpts)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	execCmds = append(execCmds, netCmds...)
   217  
   218  	// create the container
   219  	sh.c, err = create(ctx, hName, hUuid, dockerNet, dockerVol, nil, dockerEnv, "alpine", []string{"sleep", "infinity"})
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  
   224  	// start the container
   225  	err = sh.c.start(ctx)
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  
   230  	// run post-creation steps
   231  	for _, cmd := range execCmds {
   232  		_, err := sh.c.exec(ctx, cmd)
   233  		if err != nil {
   234  			return nil, err
   235  		}
   236  	}
   237  
   238  	_, detail, err := sh.c.inspect()
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  	for i := range host.Config.Network.Pnic {
   243  		pnic := &host.Config.Network.Pnic[i]
   244  		bridge := getPnicUnderlay(advOpts, pnic.Device)
   245  		settings := detail.NetworkSettings.Networks[bridge]
   246  
   247  		// it doesn't really make sense at an ESX level to set this information as IP bindings are associated with
   248  		// vnics (VMs) or vmknics (daemons such as hostd).
   249  		// However it's a useful location to stash this info in a manner that can be retrieved at a later date.
   250  		pnic.Spec.Ip.IpAddress = settings.IPAddress
   251  		pnic.Spec.Ip.SubnetMask = prefixToMask(settings.IPPrefixLen)
   252  
   253  		pnic.Mac = settings.MacAddress
   254  	}
   255  
   256  	// update the active "management" nicType with the container IP for vmnic0
   257  	netconfig, err := host.getNetConfigInterface(ctx, "management")
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  	netconfig.vmk.Spec.Ip.IpAddress = netconfig.uplink.Spec.Ip.IpAddress
   262  	netconfig.vmk.Spec.Ip.SubnetMask = netconfig.uplink.Spec.Ip.SubnetMask
   263  	netconfig.vmk.Spec.Mac = netconfig.uplink.Mac
   264  
   265  	return sh, nil
   266  }
   267  
   268  // remove destroys the container associated with the host and any volumes with labels specifying their lifecycle
   269  // is coupled with the container
   270  func (sh *simHost) remove(ctx *Context) error {
   271  	if sh == nil {
   272  		return nil
   273  	}
   274  
   275  	return sh.c.remove(ctx)
   276  }
   277  
   278  // volumeIDtoHostVolumeUUID takes the 64 char docker uuid and converts it into a 32char ESX form of 8-8-4-12
   279  // Perhaps we should do this using an md5 rehash, but instead we just take the first 32char for ease of cross-reference.
   280  func volumeIDtoHostVolumeUUID(id string) string {
   281  	return fmt.Sprintf("%s-%s-%s-%s", id[0:8], id[8:16], id[16:20], id[20:32])
   282  }
   283  
   284  // By reference to physical system, partition numbering tends to work out like this:
   285  // 1. EFI System (100 MB)
   286  // Free space (1.97 MB)
   287  // 5. Basic Data (4 GB) (bootbank1)
   288  // 6. Basic Data (4 GB) (bootbank2)
   289  // 7. VMFSL (119.9 GB)  (os-data)
   290  // 8. VMFS (1 TB)       (datastore1)
   291  // I assume the jump from 1 -> 5 harks back to the primary/logical partitions from MBT days
   292  const uuidToken = "%__UUID__%"
   293  
   294  var defaultSimVolumes = []types.HostFileSystemMountInfo{
   295  	{
   296  		MountInfo: types.HostMountInfo{
   297  			AccessMode: "readWrite",
   298  		},
   299  		Volume: &types.HostVmfsVolume{
   300  			HostFileSystemVolume: types.HostFileSystemVolume{
   301  				Type:     "VMFS",
   302  				Name:     "datastore1",
   303  				Capacity: 1 * units.TB,
   304  			},
   305  			Extent: []types.HostScsiDiskPartition{
   306  				{
   307  					Partition: 8,
   308  				},
   309  			},
   310  		},
   311  	},
   312  	{
   313  		MountInfo: types.HostMountInfo{
   314  			AccessMode: "readWrite",
   315  		},
   316  		Volume: &types.HostVmfsVolume{
   317  			HostFileSystemVolume: types.HostFileSystemVolume{
   318  				Type:     "OTHER",
   319  				Name:     "OSDATA-%__UUID__%",
   320  				Capacity: 128 * units.GB,
   321  			},
   322  			Extent: []types.HostScsiDiskPartition{
   323  				{
   324  					Partition: 7,
   325  				},
   326  			},
   327  		},
   328  	},
   329  	{
   330  		MountInfo: types.HostMountInfo{
   331  			AccessMode: "readOnly",
   332  		},
   333  		Volume: &types.HostVfatVolume{
   334  			HostFileSystemVolume: types.HostFileSystemVolume{
   335  				Type:     "OTHER",
   336  				Name:     "BOOTBANK1",
   337  				Capacity: 4 * units.GB,
   338  			},
   339  		},
   340  	},
   341  	{
   342  		MountInfo: types.HostMountInfo{
   343  			AccessMode: "readOnly",
   344  		},
   345  		Volume: &types.HostVfatVolume{
   346  			HostFileSystemVolume: types.HostFileSystemVolume{
   347  				Type:     "OTHER",
   348  				Name:     "BOOTBANK2",
   349  				Capacity: 4 * units.GB,
   350  			},
   351  		},
   352  	},
   353  }