github.com/vmware/govmomi@v0.43.0/simulator/container_host_system.go (about) 1 /* 2 Copyright (c) 2023-2023 VMware, Inc. All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package simulator 18 19 import ( 20 "fmt" 21 "strings" 22 23 "github.com/vmware/govmomi/units" 24 "github.com/vmware/govmomi/vim25/methods" 25 "github.com/vmware/govmomi/vim25/types" 26 ) 27 28 const ( 29 advOptPrefixPnicToUnderlayPrefix = "RUN.underlay." 30 advOptContainerBackingImage = "RUN.container" 31 defaultUnderlayBridgeName = "vcsim-underlay" 32 ) 33 34 type simHost struct { 35 host *HostSystem 36 c *container 37 } 38 39 // createSimHostMounts iterates over the provide filesystem mount info, creating docker volumes. It does _not_ delete volumes 40 // already created if creation of one fails. 41 // Returns: 42 // volume mounts: mount options suitable to pass directly to docker 43 // exec commands: a set of commands to run in the sim host after creation 44 // error: if construction of the above outputs fails 45 func createSimHostMounts(ctx *Context, containerName string, mounts []types.HostFileSystemMountInfo) ([]string, [][]string, error) { 46 var dockerVol []string 47 var symlinkCmds [][]string 48 49 for i := range mounts { 50 info := &mounts[i] 51 name := info.Volume.GetHostFileSystemVolume().Name 52 53 // NOTE: if we ever need persistence cross-invocation we can look at encoding the disk info as a label 54 labels := []string{"name=" + name, "container=" + containerName, deleteWithContainer} 55 dockerUuid, err := createVolume("", labels, nil) 56 if err != nil { 57 return nil, nil, err 58 } 59 60 uuid := volumeIDtoHostVolumeUUID(dockerUuid) 61 name = strings.Replace(name, uuidToken, uuid, -1) 62 63 switch vol := info.Volume.(type) { 64 case *types.HostVmfsVolume: 65 vol.BlockSizeMb = 1 66 vol.BlockSize = units.KB 67 vol.UnmapGranularity = units.KB 68 vol.UnmapPriority = "low" 69 vol.MajorVersion = 6 70 vol.Version = "6.82" 71 vol.Uuid = uuid 72 vol.HostFileSystemVolume.Name = name 73 for e := range vol.Extent { 74 vol.Extent[e].DiskName = "____simulated_volume_____" 75 if vol.Extent[e].Partition == 0 { 76 // HACK: this should be unique within the diskname, but for now this will suffice 77 // partitions start at 1 78 vol.Extent[e].Partition = int32(e + 1) 79 } 80 } 81 vol.Ssd = types.NewBool(true) 82 vol.Local = types.NewBool(true) 83 case *types.HostVfatVolume: 84 vol.HostFileSystemVolume.Name = name 85 } 86 87 info.VStorageSupport = "vStorageUnsupported" 88 89 info.MountInfo.Path = "/vmfs/volumes/" + uuid 90 info.MountInfo.Mounted = types.NewBool(true) 91 info.MountInfo.Accessible = types.NewBool(true) 92 if info.MountInfo.AccessMode == "" { 93 info.MountInfo.AccessMode = "readWrite" 94 } 95 96 opt := "rw" 97 if info.MountInfo.AccessMode == "readOnly" { 98 opt = "ro" 99 } 100 101 dockerVol = append(dockerVol, fmt.Sprintf("%s:/vmfs/volumes/%s:%s", dockerUuid, uuid, opt)) 102 103 // create symlinks from /vmfs/volumes/ for the Volume Name - the direct mount (path) is only the uuid 104 // ? can we do this via a script in the ESX image instead of via exec? 105 // ? are the volume names exposed in any manner inside the host? They must be because these mounts exist but where does that come from? Chicken and egg problem? ConfigStore? 106 symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), fmt.Sprintf("/vmfs/volumes/%s", name)}) 107 if strings.HasPrefix(name, "OSDATA") { 108 symlinkCmds = append(symlinkCmds, []string{"mkdir", "-p", "/var/lib/vmware"}) 109 symlinkCmds = append(symlinkCmds, []string{"ln", "-s", fmt.Sprintf("/vmfs/volumes/%s", uuid), "/var/lib/vmware/osdata"}) 110 } 111 } 112 113 return dockerVol, symlinkCmds, nil 114 } 115 116 // createSimHostNetworks creates the networks for the host if not already created. Because we expect multiple hosts on the same network to act as a cluster 117 // it's likely that only the first host will create networks. 118 // This includes: 119 // * bridge network per-pNIC 120 // * bridge network per-DVS 121 // 122 // Returns: 123 // * array of networks to attach to 124 // * array of commands to run 125 // * error 126 // 127 // TODO: implement bridge network per DVS - not needed until container backed VMs are "created" on container backed "hosts" 128 func createSimHostNetworks(ctx *Context, containerName string, networkInfo *types.HostNetworkInfo, advOpts *OptionManager) ([]string, [][]string, error) { 129 var dockerNet []string 130 var cmds [][]string 131 132 existingNets := make(map[string]string) 133 134 // a pnic does not have an IP so this is purely a connectivity statement, not a network identity, however this is not how docker works 135 // so we're going to end up with a veth (our pnic) that does have an IP assigned. That IP will end up being used in a NetConfig structure associated 136 // with the pNIC. See HostSystem.getNetConfigInterface. 137 for i := range networkInfo.Pnic { 138 pnicName := networkInfo.Pnic[i].Device 139 140 bridge := getPnicUnderlay(advOpts, pnicName) 141 142 if pnic, attached := existingNets[bridge]; attached { 143 return nil, nil, fmt.Errorf("cannot attach multiple pNICs to the same underlay: %s and %s both attempting to connect to %s for %s", pnic, pnicName, bridge, containerName) 144 } 145 146 _, err := createBridge(bridge) 147 if err != nil { 148 return nil, nil, err 149 } 150 151 dockerNet = append(dockerNet, bridge) 152 existingNets[bridge] = pnicName 153 } 154 155 return dockerNet, cmds, nil 156 } 157 158 func getPnicUnderlay(advOpts *OptionManager, pnicName string) string { 159 queryRes := advOpts.QueryOptions(&types.QueryOptions{Name: advOptPrefixPnicToUnderlayPrefix + pnicName}).(*methods.QueryOptionsBody).Res 160 return queryRes.Returnval[0].GetOptionValue().Value.(string) 161 } 162 163 // createSimulationHostcreates a simHost binding if the host.ConfigManager.AdvancedOption set contains a key "RUN.container". 164 // If the set does not contain that key, this returns nil. 165 // Methods on the simHost type are written to check for nil object so the return from this call can be blindly 166 // assigned and invoked without the caller caring about whether a binding for a backing container was warranted. 167 // 168 // The created simhost is based off of the details of the supplied host system. 169 // VMFS locations are created based on FileSystemMountInfo 170 // Bridge networks are created to simulate underlay networks - one per pNIC. You cannot connect two pNICs to the same underlay. 171 // 172 // On Network connectivity - initially this is using docker network constructs. This means we cannot easily use nested "ip netns" so we cannot 173 // have a perfect representation of the ESX structure: pnic(veth)->vswtich(bridge)->{vmk,vnic}(veth) 174 // Instead we have the following: 175 // * bridge network per underlay - everything connects directly to the underlay 176 // * VMs/CRXs connect to the underlay dictated by the Uplink pNIC attached to their vSwitch 177 // * hostd vmknic gets the "host" container IP - we don't currently support multiple vmknics with different IPs 178 // * no support for mocking VLANs 179 func createSimulationHost(ctx *Context, host *HostSystem) (*simHost, error) { 180 sh := &simHost{ 181 host: host, 182 } 183 184 advOpts := ctx.Map.Get(host.ConfigManager.AdvancedOption.Reference()).(*OptionManager) 185 fault := advOpts.QueryOptions(&types.QueryOptions{Name: "RUN.container"}).(*methods.QueryOptionsBody).Fault() 186 if fault != nil { 187 if _, ok := fault.VimFault().(*types.InvalidName); ok { 188 return nil, nil 189 } 190 return nil, fmt.Errorf("errror retrieving container backing from host config manager: %+v", fault.VimFault()) 191 } 192 193 // assemble env 194 var dockerEnv []string 195 196 var execCmds [][]string 197 198 var err error 199 200 hName := host.Summary.Config.Name 201 hUuid := host.Summary.Hardware.Uuid 202 containerName := constructContainerName(hName, hUuid) 203 204 // create volumes and mounts 205 dockerVol, volCmds, err := createSimHostMounts(ctx, containerName, host.Config.FileSystemVolume.MountInfo) 206 if err != nil { 207 return nil, err 208 } 209 execCmds = append(execCmds, volCmds...) 210 211 // create networks 212 dockerNet, netCmds, err := createSimHostNetworks(ctx, containerName, host.Config.Network, advOpts) 213 if err != nil { 214 return nil, err 215 } 216 execCmds = append(execCmds, netCmds...) 217 218 // create the container 219 sh.c, err = create(ctx, hName, hUuid, dockerNet, dockerVol, nil, dockerEnv, "alpine", []string{"sleep", "infinity"}) 220 if err != nil { 221 return nil, err 222 } 223 224 // start the container 225 err = sh.c.start(ctx) 226 if err != nil { 227 return nil, err 228 } 229 230 // run post-creation steps 231 for _, cmd := range execCmds { 232 _, err := sh.c.exec(ctx, cmd) 233 if err != nil { 234 return nil, err 235 } 236 } 237 238 _, detail, err := sh.c.inspect() 239 if err != nil { 240 return nil, err 241 } 242 for i := range host.Config.Network.Pnic { 243 pnic := &host.Config.Network.Pnic[i] 244 bridge := getPnicUnderlay(advOpts, pnic.Device) 245 settings := detail.NetworkSettings.Networks[bridge] 246 247 // it doesn't really make sense at an ESX level to set this information as IP bindings are associated with 248 // vnics (VMs) or vmknics (daemons such as hostd). 249 // However it's a useful location to stash this info in a manner that can be retrieved at a later date. 250 pnic.Spec.Ip.IpAddress = settings.IPAddress 251 pnic.Spec.Ip.SubnetMask = prefixToMask(settings.IPPrefixLen) 252 253 pnic.Mac = settings.MacAddress 254 } 255 256 // update the active "management" nicType with the container IP for vmnic0 257 netconfig, err := host.getNetConfigInterface(ctx, "management") 258 if err != nil { 259 return nil, err 260 } 261 netconfig.vmk.Spec.Ip.IpAddress = netconfig.uplink.Spec.Ip.IpAddress 262 netconfig.vmk.Spec.Ip.SubnetMask = netconfig.uplink.Spec.Ip.SubnetMask 263 netconfig.vmk.Spec.Mac = netconfig.uplink.Mac 264 265 return sh, nil 266 } 267 268 // remove destroys the container associated with the host and any volumes with labels specifying their lifecycle 269 // is coupled with the container 270 func (sh *simHost) remove(ctx *Context) error { 271 if sh == nil { 272 return nil 273 } 274 275 return sh.c.remove(ctx) 276 } 277 278 // volumeIDtoHostVolumeUUID takes the 64 char docker uuid and converts it into a 32char ESX form of 8-8-4-12 279 // Perhaps we should do this using an md5 rehash, but instead we just take the first 32char for ease of cross-reference. 280 func volumeIDtoHostVolumeUUID(id string) string { 281 return fmt.Sprintf("%s-%s-%s-%s", id[0:8], id[8:16], id[16:20], id[20:32]) 282 } 283 284 // By reference to physical system, partition numbering tends to work out like this: 285 // 1. EFI System (100 MB) 286 // Free space (1.97 MB) 287 // 5. Basic Data (4 GB) (bootbank1) 288 // 6. Basic Data (4 GB) (bootbank2) 289 // 7. VMFSL (119.9 GB) (os-data) 290 // 8. VMFS (1 TB) (datastore1) 291 // I assume the jump from 1 -> 5 harks back to the primary/logical partitions from MBT days 292 const uuidToken = "%__UUID__%" 293 294 var defaultSimVolumes = []types.HostFileSystemMountInfo{ 295 { 296 MountInfo: types.HostMountInfo{ 297 AccessMode: "readWrite", 298 }, 299 Volume: &types.HostVmfsVolume{ 300 HostFileSystemVolume: types.HostFileSystemVolume{ 301 Type: "VMFS", 302 Name: "datastore1", 303 Capacity: 1 * units.TB, 304 }, 305 Extent: []types.HostScsiDiskPartition{ 306 { 307 Partition: 8, 308 }, 309 }, 310 }, 311 }, 312 { 313 MountInfo: types.HostMountInfo{ 314 AccessMode: "readWrite", 315 }, 316 Volume: &types.HostVmfsVolume{ 317 HostFileSystemVolume: types.HostFileSystemVolume{ 318 Type: "OTHER", 319 Name: "OSDATA-%__UUID__%", 320 Capacity: 128 * units.GB, 321 }, 322 Extent: []types.HostScsiDiskPartition{ 323 { 324 Partition: 7, 325 }, 326 }, 327 }, 328 }, 329 { 330 MountInfo: types.HostMountInfo{ 331 AccessMode: "readOnly", 332 }, 333 Volume: &types.HostVfatVolume{ 334 HostFileSystemVolume: types.HostFileSystemVolume{ 335 Type: "OTHER", 336 Name: "BOOTBANK1", 337 Capacity: 4 * units.GB, 338 }, 339 }, 340 }, 341 { 342 MountInfo: types.HostMountInfo{ 343 AccessMode: "readOnly", 344 }, 345 Volume: &types.HostVfatVolume{ 346 HostFileSystemVolume: types.HostFileSystemVolume{ 347 Type: "OTHER", 348 Name: "BOOTBANK2", 349 Capacity: 4 * units.GB, 350 }, 351 }, 352 }, 353 }