github.com/vmware/govmomi@v0.51.0/simulator/container_host_system.go (about) 1 // © Broadcom. All Rights Reserved. 2 // The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. 3 // SPDX-License-Identifier: Apache-2.0 4 5 package simulator 6 7 import ( 8 "fmt" 9 "strings" 10 11 "github.com/vmware/govmomi/units" 12 "github.com/vmware/govmomi/vim25/methods" 13 "github.com/vmware/govmomi/vim25/types" 14 ) 15 16 const ( 17 advOptPrefixPnicToUnderlayPrefix = "RUN.underlay." 18 advOptContainerBackingImage = "RUN.container" 19 defaultUnderlayBridgeName = "vcsim-underlay" 20 ) 21 22 type simHost struct { 23 host *HostSystem 24 c *container 25 } 26 27 // createSimHostMounts iterates over the provide filesystem mount info, creating docker volumes. It does _not_ delete volumes 28 // already created if creation of one fails. 29 // Returns: 30 // volume mounts: mount options suitable to pass directly to docker 31 // exec commands: a set of commands to run in the sim host after creation 32 // error: if construction of the above outputs fails 33 func createSimHostMounts(ctx *Context, containerName string, mounts []types.HostFileSystemMountInfo) ([]string, [][]string, error) { 34 var dockerVol []string 35 var symlinkCmds [][]string 36 37 for i := range mounts { 38 info := &mounts[i] 39 name := info.Volume.GetHostFileSystemVolume().Name 40 41 // NOTE: if we ever need persistence cross-invocation we can look at encoding the disk info as a label 42 labels := []string{"name=" + name, "container=" + containerName, deleteWithContainer} 43 dockerUuid, err := createVolume("", labels, nil) 44 if err != nil { 45 return nil, nil, err 46 } 47 48 uuid := volumeIDtoHostVolumeUUID(dockerUuid) 49 name = strings.Replace(name, uuidToken, uuid, -1) 50 51 switch vol := info.Volume.(type) { 52 case *types.HostVmfsVolume: 53 vol.BlockSizeMb = 1 54 vol.BlockSize = units.KB 55 vol.UnmapGranularity = units.KB 56 vol.UnmapPriority = "low" 57 vol.MajorVersion = 6 58 vol.Version = "6.82" 59 vol.Uuid = uuid 60 vol.HostFileSystemVolume.Name = name 61 for e := range vol.Extent { 62 vol.Extent[e].DiskName = "____simulated_volume_____" 63 if vol.Extent[e].Partition == 0 { 64 // HACK: this should be unique within the diskname, but for now this will suffice 65 // partitions start at 1 66 vol.Extent[e].Partition = int32(e + 1) 67 } 68 } 69 vol.Ssd = types.NewBool(true) 70 vol.Local = types.NewBool(true) 71 case *types.HostVfatVolume: 72 vol.HostFileSystemVolume.Name = name 73 } 74 75 info.VStorageSupport = "vStorageUnsupported" 76 77 info.MountInfo.Path = "/vmfs/volumes/" + uuid 78 info.MountInfo.Mounted = types.NewBool(true) 79 info.MountInfo.Accessible = types.NewBool(true) 80 if info.MountInfo.AccessMode == "" { 81 info.MountInfo.AccessMode = "readWrite" 82 } 83 84 opt := "rw" 85 if info.MountInfo.AccessMode == "readOnly" { 86 opt = "ro" 87 } 88 89 dockerVol = append(dockerVol, fmt.Sprintf("%s:/vmfs/volumes/%s:%s", dockerUuid, uuid, opt)) 90 91 // create symlinks from /vmfs/volumes/ for the Volume Name - the direct mount (path) is only the uuid 92 // ? can we do this via a script in the ESX image instead of via exec? 93 // ? are the volume names exposed in any manner inside the host? They must be because these mounts exist but where does that come from? Chicken and egg problem? ConfigStore? 94 symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), fmt.Sprintf("/vmfs/volumes/%s", name)}) 95 if strings.HasPrefix(name, "OSDATA") { 96 symlinkCmds = append(symlinkCmds, []string{"mkdir", "-p", "/var/lib/vmware"}) 97 symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), "/var/lib/vmware/osdata"}) 98 } 99 } 100 101 return dockerVol, symlinkCmds, nil 102 } 103 104 // createSimHostNetworks creates the networks for the host if not already created. Because we expect multiple hosts on the same network to act as a cluster 105 // it's likely that only the first host will create networks. 106 // This includes: 107 // * bridge network per-pNIC 108 // * bridge network per-DVS 109 // 110 // Returns: 111 // * array of networks to attach to 112 // * array of commands to run 113 // * error 114 // 115 // TODO: implement bridge network per DVS - not needed until container backed VMs are "created" on container backed "hosts" 116 func createSimHostNetworks(ctx *Context, containerName string, networkInfo *types.HostNetworkInfo, advOpts *OptionManager) ([]string, [][]string, error) { 117 var dockerNet []string 118 var cmds [][]string 119 120 existingNets := make(map[string]string) 121 122 // a pnic does not have an IP so this is purely a connectivity statement, not a network identity, however this is not how docker works 123 // so we're going to end up with a veth (our pnic) that does have an IP assigned. That IP will end up being used in a NetConfig structure associated 124 // with the pNIC. See HostSystem.getNetConfigInterface. 125 for i := range networkInfo.Pnic { 126 pnicName := networkInfo.Pnic[i].Device 127 128 bridge := getPnicUnderlay(advOpts, pnicName) 129 130 if pnic, attached := existingNets[bridge]; attached { 131 return nil, nil, fmt.Errorf("cannot attach multiple pNICs to the same underlay: %s and %s both attempting to connect to %s for %s", pnic, pnicName, bridge, containerName) 132 } 133 134 _, err := createBridge(bridge) 135 if err != nil { 136 return nil, nil, err 137 } 138 139 dockerNet = append(dockerNet, bridge) 140 existingNets[bridge] = pnicName 141 } 142 143 return dockerNet, cmds, nil 144 } 145 146 func getPnicUnderlay(advOpts *OptionManager, pnicName string) string { 147 queryRes := advOpts.QueryOptions(&types.QueryOptions{Name: advOptPrefixPnicToUnderlayPrefix + pnicName}).(*methods.QueryOptionsBody).Res 148 return queryRes.Returnval[0].GetOptionValue().Value.(string) 149 } 150 151 // createSimulationHostcreates a simHost binding if the host.ConfigManager.AdvancedOption set contains a key "RUN.container". 152 // If the set does not contain that key, this returns nil. 153 // Methods on the simHost type are written to check for nil object so the return from this call can be blindly 154 // assigned and invoked without the caller caring about whether a binding for a backing container was warranted. 155 // 156 // The created simhost is based off of the details of the supplied host system. 157 // VMFS locations are created based on FileSystemMountInfo 158 // Bridge networks are created to simulate underlay networks - one per pNIC. You cannot connect two pNICs to the same underlay. 159 // 160 // On Network connectivity - initially this is using docker network constructs. This means we cannot easily use nested "ip netns" so we cannot 161 // have a perfect representation of the ESX structure: pnic(veth)->vswtich(bridge)->{vmk,vnic}(veth) 162 // Instead we have the following: 163 // * bridge network per underlay - everything connects directly to the underlay 164 // * VMs/CRXs connect to the underlay dictated by the Uplink pNIC attached to their vSwitch 165 // * hostd vmknic gets the "host" container IP - we don't currently support multiple vmknics with different IPs 166 // * no support for mocking VLANs 167 func createSimulationHost(ctx *Context, host *HostSystem) (*simHost, error) { 168 sh := &simHost{ 169 host: host, 170 } 171 172 advOpts := ctx.Map.Get(host.ConfigManager.AdvancedOption.Reference()).(*OptionManager) 173 fault := advOpts.QueryOptions(&types.QueryOptions{Name: "RUN.container"}).(*methods.QueryOptionsBody).Fault() 174 if fault != nil { 175 if _, ok := fault.VimFault().(*types.InvalidName); ok { 176 return nil, nil 177 } 178 return nil, fmt.Errorf("errror retrieving container backing from host config manager: %+v", fault.VimFault()) 179 } 180 181 // assemble env 182 var dockerEnv []string 183 184 var execCmds [][]string 185 186 var err error 187 188 hName := host.Summary.Config.Name 189 hUuid := host.Summary.Hardware.Uuid 190 containerName := constructContainerName(hName, hUuid) 191 192 // create volumes and mounts 193 dockerVol, volCmds, err := createSimHostMounts(ctx, containerName, host.Config.FileSystemVolume.MountInfo) 194 if err != nil { 195 return nil, err 196 } 197 execCmds = append(execCmds, volCmds...) 198 199 // create networks 200 dockerNet, netCmds, err := createSimHostNetworks(ctx, containerName, host.Config.Network, advOpts) 201 if err != nil { 202 return nil, err 203 } 204 execCmds = append(execCmds, netCmds...) 205 206 // create the container 207 sh.c, err = create(ctx, hName, hUuid, dockerNet, dockerVol, nil, dockerEnv, "alpine:3.20.3", []string{"sleep", "infinity"}) 208 if err != nil { 209 return nil, err 210 } 211 212 // start the container 213 err = sh.c.start(ctx) 214 if err != nil { 215 return nil, err 216 } 217 218 // run post-creation steps 219 for _, cmd := range execCmds { 220 _, err := sh.c.exec(ctx, cmd) 221 if err != nil { 222 return nil, err 223 } 224 } 225 226 _, detail, err := sh.c.inspect() 227 if err != nil { 228 return nil, err 229 } 230 for i := range host.Config.Network.Pnic { 231 pnic := &host.Config.Network.Pnic[i] 232 bridge := getPnicUnderlay(advOpts, pnic.Device) 233 settings := detail.NetworkSettings.Networks[bridge] 234 235 // it doesn't really make sense at an ESX level to set this information as IP bindings are associated with 236 // vnics (VMs) or vmknics (daemons such as hostd). 237 // However it's a useful location to stash this info in a manner that can be retrieved at a later date. 238 pnic.Spec.Ip.IpAddress = settings.IPAddress 239 pnic.Spec.Ip.SubnetMask = prefixToMask(settings.IPPrefixLen) 240 241 pnic.Mac = settings.MacAddress 242 } 243 244 // update the active "management" nicType with the container IP for vmnic0 245 netconfig, err := host.getNetConfigInterface(ctx, "management") 246 if err != nil { 247 return nil, err 248 } 249 netconfig.vmk.Spec.Ip.IpAddress = netconfig.uplink.Spec.Ip.IpAddress 250 netconfig.vmk.Spec.Ip.SubnetMask = netconfig.uplink.Spec.Ip.SubnetMask 251 netconfig.vmk.Spec.Mac = netconfig.uplink.Mac 252 253 return sh, nil 254 } 255 256 // remove destroys the container associated with the host and any volumes with labels specifying their lifecycle 257 // is coupled with the container 258 func (sh *simHost) remove(ctx *Context) error { 259 if sh == nil { 260 return nil 261 } 262 263 return sh.c.remove(ctx) 264 } 265 266 // volumeIDtoHostVolumeUUID takes the 64 char docker uuid and converts it into a 32char ESX form of 8-8-4-12 267 // Perhaps we should do this using an md5 rehash, but instead we just take the first 32char for ease of cross-reference. 268 func volumeIDtoHostVolumeUUID(id string) string { 269 return fmt.Sprintf("%s-%s-%s-%s", id[0:8], id[8:16], id[16:20], id[20:32]) 270 } 271 272 // By reference to physical system, partition numbering tends to work out like this: 273 // 1. EFI System (100 MB) 274 // Free space (1.97 MB) 275 // 5. Basic Data (4 GB) (bootbank1) 276 // 6. Basic Data (4 GB) (bootbank2) 277 // 7. VMFSL (119.9 GB) (os-data) 278 // 8. VMFS (1 TB) (datastore1) 279 // I assume the jump from 1 -> 5 harks back to the primary/logical partitions from MBT days 280 const uuidToken = "%__UUID__%" 281 282 var defaultSimVolumes = []types.HostFileSystemMountInfo{ 283 { 284 MountInfo: types.HostMountInfo{ 285 AccessMode: "readWrite", 286 }, 287 Volume: &types.HostVmfsVolume{ 288 HostFileSystemVolume: types.HostFileSystemVolume{ 289 Type: "VMFS", 290 Name: "datastore1", 291 Capacity: 1 * units.TB, 292 }, 293 Extent: []types.HostScsiDiskPartition{ 294 { 295 Partition: 8, 296 }, 297 }, 298 }, 299 }, 300 { 301 MountInfo: types.HostMountInfo{ 302 AccessMode: "readWrite", 303 }, 304 Volume: &types.HostVmfsVolume{ 305 HostFileSystemVolume: types.HostFileSystemVolume{ 306 Type: "OTHER", 307 Name: "OSDATA-%__UUID__%", 308 Capacity: 128 * units.GB, 309 }, 310 Extent: []types.HostScsiDiskPartition{ 311 { 312 Partition: 7, 313 }, 314 }, 315 }, 316 }, 317 { 318 MountInfo: types.HostMountInfo{ 319 AccessMode: "readOnly", 320 }, 321 Volume: &types.HostVfatVolume{ 322 HostFileSystemVolume: types.HostFileSystemVolume{ 323 Type: "OTHER", 324 Name: "BOOTBANK1", 325 Capacity: 4 * units.GB, 326 }, 327 }, 328 }, 329 { 330 MountInfo: types.HostMountInfo{ 331 AccessMode: "readOnly", 332 }, 333 Volume: &types.HostVfatVolume{ 334 HostFileSystemVolume: types.HostFileSystemVolume{ 335 Type: "OTHER", 336 Name: "BOOTBANK2", 337 Capacity: 4 * units.GB, 338 }, 339 }, 340 }, 341 }