github.com/Cloud-Foundations/Dominator@v0.3.4/fleetmanager/hypervisors/update.go (about) 1 package hypervisors 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "net" 8 "strings" 9 "time" 10 11 "github.com/Cloud-Foundations/Dominator/fleetmanager/topology" 12 "github.com/Cloud-Foundations/Dominator/lib/constants" 13 "github.com/Cloud-Foundations/Dominator/lib/errors" 14 "github.com/Cloud-Foundations/Dominator/lib/log/prefixlogger" 15 "github.com/Cloud-Foundations/Dominator/lib/srpc" 16 "github.com/Cloud-Foundations/Dominator/lib/stringutil" 17 "github.com/Cloud-Foundations/Dominator/lib/tags" 18 fm_proto "github.com/Cloud-Foundations/Dominator/proto/fleetmanager" 19 hyper_proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor" 20 ) 21 22 type addressPoolOptionsType struct { 23 desiredSize uint 24 maximumSize uint 25 minimumSize uint 26 } 27 28 var ( 29 defaultAddressPoolOptions addressPoolOptionsType 30 errorNoAccessToResource = errors.New("no access to resource") 31 manageHypervisors = flag.Bool("manageHypervisors", false, 32 "If true, manage hypervisors") 33 ) 34 35 func init() { 36 flag.UintVar(&defaultAddressPoolOptions.desiredSize, 37 "desiredAddressPoolSize", 16, 38 "Desired number of free addresses to maintain in Hypervisor") 39 flag.UintVar(&defaultAddressPoolOptions.maximumSize, 40 "maximumAddressPoolSize", 24, 41 "Maximum number of free addresses to maintain in Hypervisor") 42 flag.UintVar(&defaultAddressPoolOptions.minimumSize, 43 "minimumAddressPoolSize", 8, 44 "Minimum number of free addresses to maintain in Hypervisor") 45 } 46 47 func checkPoolLimits() error { 48 if defaultAddressPoolOptions.desiredSize < 49 defaultAddressPoolOptions.minimumSize { 50 return fmt.Errorf( 51 "desiredAddressPoolSize: %d is less than minimumAddressPoolSize: %d", 52 defaultAddressPoolOptions.desiredSize, 53 defaultAddressPoolOptions.minimumSize) 54 } 55 if defaultAddressPoolOptions.desiredSize > 56 defaultAddressPoolOptions.maximumSize { 57 return fmt.Errorf( 58 "desiredAddressPoolSize: %d is greater than maximumAddressPoolSize: %d", 59 defaultAddressPoolOptions.desiredSize, 60 defaultAddressPoolOptions.maximumSize) 61 } 62 return nil 63 } 64 65 func testInLocation(location, enclosingLocation string) bool { 66 if enclosingLocation != "" && location != enclosingLocation { 67 if len(enclosingLocation) >= len(location) { 68 return false 69 } 70 if location[len(enclosingLocation)] != '/' { 71 return false 72 } 73 if location[:len(enclosingLocation)] != enclosingLocation { 74 return false 75 } 76 } 77 return true 78 } 79 80 func (h *hypervisorType) address() string { 81 hostname := h.machine.Hostname 82 if len(h.machine.HostIpAddress) > 0 { 83 hostname = h.machine.HostIpAddress.String() 84 } 85 return fmt.Sprintf("%s:%d", hostname, constants.HypervisorPortNumber) 86 } 87 88 func (h *hypervisorType) changeOwners(client *srpc.Client) error { 89 if !*manageHypervisors { 90 return nil 91 } 92 if client == nil { 93 var err error 94 client, err = srpc.DialHTTP("tcp", h.address(), time.Second*15) 95 if err != nil { 96 return err 97 } 98 defer client.Close() 99 } 100 request := hyper_proto.ChangeOwnersRequest{ 101 OwnerGroups: h.machine.OwnerGroups, 102 OwnerUsers: h.machine.OwnerUsers, 103 } 104 var reply hyper_proto.ChangeOwnersResponse 105 err := client.RequestReply("Hypervisor.ChangeOwners", request, &reply) 106 if err != nil { 107 return err 108 } 109 return errors.New(reply.Error) 110 } 111 112 func (h *hypervisorType) checkAuth(authInfo *srpc.AuthInformation) error { 113 if authInfo.HaveMethodAccess { 114 return nil 115 } 116 if _, ok := h.ownerUsers[authInfo.Username]; ok { 117 return nil 118 } 119 for _, ownerGroup := range h.machine.OwnerGroups { 120 if _, ok := authInfo.GroupList[ownerGroup]; ok { 121 return nil 122 } 123 } 124 return errorNoAccessToResource 125 } 126 127 func (h *hypervisorType) getMachineLocked() *fm_proto.Machine { 128 machine := *h.machine 129 machine.MemoryInMiB = h.memoryInMiB 130 machine.NumCPUs = h.numCPUs 131 machine.TotalVolumeBytes = h.totalVolumeBytes 132 if len(h.localTags) < 1 { 133 return &machine 134 } 135 machine.Tags = h.machine.Tags.Copy() 136 machine.Tags.Merge(h.localTags) 137 return &machine 138 } 139 140 func (m *Manager) changeMachineTags(hostname string, 141 authInfo *srpc.AuthInformation, tgs tags.Tags) error { 142 if !*manageHypervisors { 143 return errors.New("this is a read-only Fleet Manager") 144 } 145 if h, err := m.getLockedHypervisor(hostname, true); err != nil { 146 return err 147 } else if err := h.checkAuth(authInfo); err != nil { 148 h.mutex.Unlock() 149 return err 150 } else { 151 for key, localVal := range tgs { // Delete duplicates. 152 if machineVal := h.machine.Tags[key]; localVal == machineVal { 153 delete(tgs, key) 154 } 155 } 156 err := m.storer.WriteMachineTags(h.machine.HostIpAddress, tgs) 157 if err != nil { 158 h.mutex.Unlock() 159 return err 160 } 161 if len(tgs) > 0 { 162 h.localTags = tgs 163 } else { 164 h.localTags = nil 165 } 166 update := &fm_proto.Update{ 167 ChangedMachines: []*fm_proto.Machine{h.getMachineLocked()}, 168 } 169 location := h.location 170 h.mutex.Unlock() 171 m.mutex.Lock() 172 defer m.mutex.Unlock() 173 m.sendUpdate(location, update) 174 return nil 175 } 176 } 177 178 func (h *hypervisorType) getMachine() *fm_proto.Machine { 179 h.mutex.RLock() 180 defer h.mutex.RUnlock() 181 return h.getMachineLocked() 182 } 183 184 func (m *Manager) closeUpdateChannel(channel <-chan fm_proto.Update) { 185 m.mutex.Lock() 186 defer m.mutex.Unlock() 187 if location, ok := m.notifiers[channel]; ok { 188 delete(location.notifiers, channel) 189 delete(m.notifiers, channel) 190 } 191 } 192 193 func (m *Manager) makeUpdateChannel( 194 request fm_proto.GetUpdatesRequest) <-chan fm_proto.Update { 195 channel := make(chan fm_proto.Update, 16) 196 m.mutex.Lock() 197 defer m.mutex.Unlock() 198 if !*manageHypervisors && !request.IgnoreMissingLocalTags { 199 channel <- fm_proto.Update{Error: "this is a read-only Fleet Manager"} 200 return channel 201 } 202 if m.locations == nil { 203 m.locations = make(map[string]*locationType) 204 } 205 if m.notifiers == nil { 206 m.notifiers = make(map[<-chan fm_proto.Update]*locationType) 207 } 208 location, ok := m.locations[request.Location] 209 if !ok { 210 location = &locationType{ 211 notifiers: make(map[<-chan fm_proto.Update]chan<- fm_proto.Update), 212 } 213 m.locations[request.Location] = location 214 } 215 location.notifiers[channel] = channel 216 m.notifiers[channel] = location 217 machines := make([]*fm_proto.Machine, 0) 218 vms := make(map[string]*hyper_proto.VmInfo, len(m.vms)) 219 vmToHypervisor := make(map[string]string, len(m.vms)) 220 for _, h := range m.hypervisors { 221 if !testInLocation(h.location, request.Location) { 222 continue 223 } 224 machines = append(machines, h.getMachine()) 225 for addr, vm := range h.vms { 226 vms[addr] = &vm.VmInfo 227 vmToHypervisor[addr] = h.machine.Hostname 228 } 229 } 230 channel <- fm_proto.Update{ 231 ChangedMachines: machines, 232 ChangedVMs: vms, 233 VmToHypervisor: vmToHypervisor, 234 } 235 return channel 236 } 237 238 func (m *Manager) updateHypervisor(h *hypervisorType, machine *fm_proto.Machine, 239 machineChanged bool) { 240 location, _ := m.topology.GetLocationOfMachine(machine.Hostname) 241 var numTagsToDelete uint 242 h.mutex.Lock() 243 h.location = location 244 h.machine = machine 245 h.ownerUsers = stringutil.ConvertListToMap(machine.OwnerUsers, false) 246 subnets := h.subnets 247 for key, localVal := range h.localTags { 248 if machineVal, ok := h.machine.Tags[key]; ok && localVal == machineVal { 249 delete(h.localTags, key) 250 numTagsToDelete++ 251 } 252 } 253 if numTagsToDelete > 0 { 254 err := m.storer.WriteMachineTags(h.machine.HostIpAddress, h.localTags) 255 if err != nil { 256 h.logger.Printf("error writing tags: %s\n", err) 257 } else { 258 h.logger.Debugf(0, "Deleted %d obsolete local tags\n", 259 numTagsToDelete) 260 } 261 } 262 h.mutex.Unlock() 263 if *manageHypervisors && h.probeStatus == probeStatusConnected { 264 if machineChanged { 265 go h.changeOwners(nil) 266 } 267 go m.processSubnetsUpdates(h, subnets) 268 } 269 } 270 271 func (m *Manager) updateTopology(t *topology.Topology) { 272 machines, err := t.ListMachines("") 273 if err != nil { 274 m.logger.Println(err) 275 return 276 } 277 deleteList := m.updateTopologyLocked(t, machines) 278 for _, hypervisor := range deleteList { 279 m.storer.UnregisterHypervisor(hypervisor.machine.HostIpAddress) 280 hypervisor.delete() 281 } 282 } 283 284 func (m *Manager) updateTopologyLocked(t *topology.Topology, 285 machines []*fm_proto.Machine) []*hypervisorType { 286 hypervisorsToDelete := make(map[string]struct{}, len(machines)) 287 m.mutex.Lock() 288 defer m.mutex.Unlock() 289 m.topology = t 290 for hypervisorName := range m.hypervisors { 291 hypervisorsToDelete[hypervisorName] = struct{}{} 292 } 293 var hypersToChange, hypersToDelete []*hypervisorType 294 for _, machine := range machines { 295 delete(hypervisorsToDelete, machine.Hostname) 296 if hypervisor, ok := m.hypervisors[machine.Hostname]; ok { 297 equal := hypervisor.machine.Equal(machine) 298 if !equal { 299 hypersToChange = append(hypersToChange, hypervisor) 300 } 301 m.updateHypervisor(hypervisor, machine, !equal) 302 } else { 303 location, _ := m.topology.GetLocationOfMachine(machine.Hostname) 304 hypervisor := &hypervisorType{ 305 logger: prefixlogger.New(machine.Hostname+": ", m.logger), 306 location: location, 307 machine: machine, 308 migratingVms: make(map[string]*vmInfoType), 309 ownerUsers: stringutil.ConvertListToMap(machine.OwnerUsers, 310 false), 311 vms: make(map[string]*vmInfoType), 312 } 313 m.hypervisors[machine.Hostname] = hypervisor 314 hypersToChange = append(hypersToChange, hypervisor) 315 go m.manageHypervisorLoop(hypervisor) 316 } 317 } 318 deleteList := make([]*hypervisorType, 0, len(hypervisorsToDelete)) 319 for hypervisorName := range hypervisorsToDelete { 320 hypervisor := m.hypervisors[hypervisorName] 321 deleteList = append(deleteList, hypervisor) 322 delete(m.hypervisors, hypervisorName) 323 hypersToDelete = append(hypersToDelete, hypervisor) 324 for vmIP := range hypervisor.migratingVms { 325 delete(m.vms, vmIP) 326 } 327 for vmIP := range hypervisor.vms { 328 delete(m.vms, vmIP) 329 } 330 } 331 if len(hypersToChange) > 0 || len(hypersToDelete) > 0 { 332 updates := m.splitChanges(hypersToChange, hypersToDelete) 333 for location, updateForLocation := range updates { 334 m.sendUpdate(location, updateForLocation) 335 } 336 } 337 // (Re)build map of managed subnets. 338 m.subnets = make(map[string]*subnetType, len(m.subnets)) 339 t.Walk(func(directory *topology.Directory) error { 340 for _, tSubnet := range directory.Subnets { 341 if !tSubnet.Manage { 342 continue 343 } 344 gatewayIp := tSubnet.IpGateway.String() 345 if _, ok := m.subnets[gatewayIp]; ok { 346 m.logger.Printf( 347 "ignoring duplicate subnet Id: %s gateway IP: %s\n", 348 tSubnet.Id, gatewayIp) 349 continue 350 } 351 m.subnets[gatewayIp] = m.makeSubnet(tSubnet) 352 } 353 return nil 354 }) 355 return deleteList 356 } 357 358 func (h *hypervisorType) delete() { 359 h.logger.Debugln(0, "deleting") 360 h.mutex.Lock() 361 defer h.mutex.Unlock() 362 h.deleteScheduled = true 363 select { 364 case h.closeClientChannel <- struct{}{}: 365 default: 366 } 367 } 368 369 func (h *hypervisorType) isDeleteScheduled() bool { 370 h.mutex.RLock() 371 defer h.mutex.RUnlock() 372 return h.deleteScheduled 373 } 374 375 func (m *Manager) manageHypervisorLoop(h *hypervisorType) { 376 vmList, err := m.storer.ListVMs(h.machine.HostIpAddress) 377 if err != nil { 378 h.logger.Printf("error reading VMs, not managing hypervisor: %s", err) 379 return 380 } 381 h.cachedSerialNumber, err = m.storer.ReadMachineSerialNumber( 382 h.machine.HostIpAddress) 383 if err != nil { 384 h.logger.Printf( 385 "error reading serial number, not managing hypervisor: %s", err) 386 return 387 } 388 h.serialNumber = h.cachedSerialNumber 389 h.localTags, err = m.storer.ReadMachineTags(h.machine.HostIpAddress) 390 if err != nil { 391 h.logger.Printf("error reading tags, not managing hypervisor: %s", err) 392 return 393 } 394 for _, vmIpAddr := range vmList { 395 pVmInfo, err := m.storer.ReadVm(h.machine.HostIpAddress, vmIpAddr) 396 if err != nil { 397 h.logger.Printf("error reading VM: %s: %s", vmIpAddr, err) 398 continue 399 } 400 vmInfo := &vmInfoType{vmIpAddr, *pVmInfo, h.location, h} 401 h.vms[vmIpAddr] = vmInfo 402 m.mutex.Lock() 403 m.vms[vmIpAddr] = vmInfo 404 m.mutex.Unlock() 405 } 406 for !h.isDeleteScheduled() { 407 sleepTime := m.manageHypervisor(h) 408 time.Sleep(sleepTime) 409 } 410 } 411 412 func (m *Manager) manageHypervisor(h *hypervisorType) time.Duration { 413 failureProbeStatus := probeStatusUnreachable 414 defer func() { 415 h.mutex.Lock() 416 defer h.mutex.Unlock() 417 h.closeClientChannel = nil 418 h.probeStatus = failureProbeStatus 419 }() 420 client, err := srpc.DialHTTP("tcp", h.address(), time.Second*15) 421 if err != nil { 422 h.logger.Debugln(1, err) 423 switch err { 424 case srpc.ErrorAccessToMethodDenied: 425 failureProbeStatus = probeStatusAccessDenied 426 case srpc.ErrorNoSrpcEndpoint: 427 failureProbeStatus = probeStatusNoSrpc 428 case srpc.ErrorConnectionRefused: 429 failureProbeStatus = probeStatusConnectionRefused 430 default: 431 failureProbeStatus = m.probeUnreachable(h) 432 } 433 return time.Second 434 } 435 defer client.Close() 436 if err := h.changeOwners(client); err != nil { 437 if strings.HasPrefix(err.Error(), "unknown service") { 438 h.logger.Debugln(1, err) 439 } else { 440 h.logger.Println(err) 441 } 442 } 443 conn, err := client.Call("Hypervisor.GetUpdates") 444 if err != nil { 445 if strings.HasPrefix(err.Error(), "unknown service") { 446 h.logger.Debugln(1, err) 447 failureProbeStatus = probeStatusNoService 448 return time.Minute 449 } else { 450 h.logger.Println(err) 451 } 452 return time.Second 453 } 454 h.mutex.Lock() 455 h.probeStatus = probeStatusConnected 456 if h.deleteScheduled { 457 h.mutex.Unlock() 458 conn.Close() 459 return 0 460 } 461 closeClientChannel := make(chan struct{}, 1) 462 h.closeClientChannel = closeClientChannel 463 h.receiveChannel = make(chan struct{}, 1) 464 h.mutex.Unlock() 465 go h.monitorLoop(client, conn, closeClientChannel) 466 defer close(h.receiveChannel) 467 h.logger.Debugln(0, "waiting for Update messages") 468 firstUpdate := true 469 for { 470 var update hyper_proto.Update 471 if err := conn.Decode(&update); err != nil { 472 if err == io.EOF { 473 h.logger.Debugln(0, "remote closed connection") 474 } else if !client.IsClosed() { 475 h.logger.Println(err) 476 } 477 return time.Second 478 } 479 h.receiveChannel <- struct{}{} 480 m.processHypervisorUpdate(h, update, firstUpdate) 481 firstUpdate = false 482 } 483 } 484 485 func (m *Manager) getSubnetsForMachine(h *hypervisorType) ( 486 map[string]*topology.Subnet, error) { 487 m.mutex.Lock() 488 subnetsSlice, err := m.topology.GetSubnetsForMachine(h.machine.Hostname) 489 m.mutex.Unlock() 490 if err != nil { 491 return nil, err 492 } 493 subnetsMap := make(map[string]*topology.Subnet, len(subnetsSlice)) 494 for _, subnet := range subnetsSlice { 495 subnetsMap[subnet.Id] = subnet 496 } 497 return subnetsMap, nil 498 } 499 500 func (m *Manager) processAddressPoolUpdates(h *hypervisorType, 501 update hyper_proto.Update) { 502 if update.HaveAddressPool { 503 h.logger.Debugf(1, "registered address pool size: %d\n", 504 len(update.AddressPool)) 505 addresses := make([]net.IP, 0, len(update.AddressPool)) 506 for _, address := range update.AddressPool { 507 addresses = append(addresses, address.IpAddress) 508 } 509 err := m.storer.SetIPsForHypervisor(h.machine.HostIpAddress, 510 addresses) 511 if err != nil { 512 h.logger.Println(err) 513 } 514 } 515 ipsToAdd := make([]net.IP, 0) 516 addressesToAdd := make([]hyper_proto.Address, 0) 517 maxFreeAddresses := make(map[string]uint) 518 tSubnets, err := m.getSubnetsForMachine(h) 519 if err != nil { 520 h.logger.Println(err) 521 return 522 } 523 addressPoolOptions := defaultAddressPoolOptions 524 if h.disabled { 525 addressPoolOptions.desiredSize = 0 526 addressPoolOptions.maximumSize = 0 527 addressPoolOptions.minimumSize = 0 528 } else if h.healthStatus == "marginal" || h.healthStatus == "at risk" { 529 addressPoolOptions.desiredSize = 1 530 addressPoolOptions.maximumSize = 1 531 addressPoolOptions.minimumSize = 1 532 } 533 var numAddressesToRemove uint 534 for subnetId, numFreeAddresses := range update.NumFreeAddresses { 535 tSubnet := tSubnets[subnetId] 536 if tSubnet == nil { 537 h.logger.Printf("update for missing subnet: %s\n", subnetId) 538 return 539 } 540 if !tSubnet.Manage { 541 continue 542 } 543 if numFreeAddresses < addressPoolOptions.minimumSize { 544 m.mutex.Lock() 545 freeIPs, err := m.findFreeIPs(tSubnet, 546 addressPoolOptions.desiredSize-numFreeAddresses) 547 defer m.unmarkAllocatingIPs(freeIPs) 548 m.mutex.Unlock() 549 if err != nil { 550 h.logger.Println(err) 551 return 552 } 553 if len(freeIPs) < 1 { 554 continue 555 } 556 for _, ip := range freeIPs { 557 ipsToAdd = append(ipsToAdd, ip) 558 addressesToAdd = append(addressesToAdd, hyper_proto.Address{ 559 IpAddress: ip, 560 MacAddress: fmt.Sprintf("52:54:%02x:%02x:%02x:%02x", 561 ip[0], ip[1], ip[2], ip[3]), 562 }) 563 } 564 h.logger.Debugf(0, "Adding %d addresses to subnet: %s\n", 565 len(freeIPs), subnetId) 566 } else if numFreeAddresses > addressPoolOptions.maximumSize { 567 maxFreeAddresses[subnetId] = addressPoolOptions.desiredSize 568 numAddressesToRemove += numFreeAddresses - 569 addressPoolOptions.desiredSize 570 h.logger.Debugf(0, "Removing %d excess addresses from subnet: %s\n", 571 numFreeAddresses-addressPoolOptions.maximumSize, subnetId) 572 } 573 } 574 if len(addressesToAdd) < 1 && len(maxFreeAddresses) < 1 { 575 return 576 } 577 client, err := srpc.DialHTTP("tcp", h.address(), time.Minute) 578 if err != nil { 579 h.logger.Println(err) 580 return 581 } 582 defer client.Close() 583 request := hyper_proto.ChangeAddressPoolRequest{ 584 AddressesToAdd: addressesToAdd, 585 MaximumFreeAddresses: maxFreeAddresses, 586 } 587 var reply hyper_proto.ChangeAddressPoolResponse 588 err = client.RequestReply("Hypervisor.ChangeAddressPool", 589 request, &reply) 590 if err == nil { 591 err = errors.New(reply.Error) 592 } 593 if err != nil { 594 h.logger.Println(err) 595 return 596 } 597 m.storer.AddIPsForHypervisor(h.machine.HostIpAddress, ipsToAdd) 598 if len(addressesToAdd) > 0 { 599 h.logger.Debugf(0, "replenished pool with %d addresses\n", 600 len(addressesToAdd)) 601 } 602 if len(maxFreeAddresses) > 0 { 603 h.logger.Debugf(0, "removed %d excess addresses from pool\n", 604 numAddressesToRemove) 605 } 606 } 607 608 func (m *Manager) processHypervisorUpdate(h *hypervisorType, 609 update hyper_proto.Update, firstUpdate bool) { 610 h.mutex.Lock() 611 if update.HaveDisabled { 612 h.disabled = update.Disabled 613 } 614 if update.MemoryInMiB != nil { 615 h.memoryInMiB = *update.MemoryInMiB 616 } 617 if update.NumCPUs != nil { 618 h.numCPUs = *update.NumCPUs 619 } 620 if update.TotalVolumeBytes != nil { 621 h.totalVolumeBytes = *update.TotalVolumeBytes 622 } 623 oldHealthStatus := h.healthStatus 624 h.healthStatus = update.HealthStatus 625 oldSerialNumber := h.serialNumber 626 if update.HaveSerialNumber && update.SerialNumber != "" { 627 h.serialNumber = update.SerialNumber 628 } 629 h.mutex.Unlock() 630 if !firstUpdate && update.HealthStatus != oldHealthStatus { 631 h.logger.Printf("health status changed from: \"%s\" to: \"%s\"\n", 632 oldHealthStatus, update.HealthStatus) 633 } 634 if *manageHypervisors { 635 if update.HaveSubnets { // Must do subnets first. 636 h.mutex.Lock() 637 h.subnets = update.Subnets 638 h.mutex.Unlock() 639 m.processSubnetsUpdates(h, update.Subnets) 640 } 641 m.processAddressPoolUpdates(h, update) 642 } 643 if update.HaveSerialNumber && update.SerialNumber != "" && 644 update.SerialNumber != oldSerialNumber { 645 err := m.storer.WriteMachineSerialNumber(h.machine.HostIpAddress, 646 update.SerialNumber) 647 if err != nil { 648 h.logger.Println(err) 649 } else { 650 h.mutex.Lock() 651 h.cachedSerialNumber = update.SerialNumber 652 h.mutex.Unlock() 653 } 654 } 655 if update.HaveVMs { 656 if firstUpdate { 657 m.processInitialVMs(h, update.VMs) 658 } else { 659 m.processVmUpdates(h, update.VMs) 660 } 661 } 662 } 663 664 func (m *Manager) processInitialVMs(h *hypervisorType, 665 vms map[string]*hyper_proto.VmInfo) { 666 m.mutex.Lock() 667 defer m.mutex.Unlock() 668 for ipAddr := range h.vms { 669 if _, ok := vms[ipAddr]; !ok { 670 vms[ipAddr] = nil 671 } 672 } 673 for ipAddr := range h.migratingVms { 674 if _, ok := vms[ipAddr]; !ok { 675 vms[ipAddr] = nil 676 } 677 } 678 m.processVmUpdatesWithLock(h, vms) 679 } 680 681 func (m *Manager) processSubnetsUpdates(h *hypervisorType, 682 haveSubnets []hyper_proto.Subnet) { 683 haveSubnetsMap := make(map[string]int, len(haveSubnets)) 684 for index, subnet := range haveSubnets { 685 haveSubnetsMap[subnet.Id] = index 686 } 687 t, err := m.getTopology() 688 if err != nil { 689 h.logger.Println(err) 690 return 691 } 692 needSubnets, err := t.GetSubnetsForMachine(h.machine.Hostname) 693 if err != nil { 694 h.logger.Println(err) 695 return 696 } 697 subnetsToDelete := make(map[string]struct{}, len(haveSubnets)) 698 for _, subnet := range haveSubnets { 699 subnetsToDelete[subnet.Id] = struct{}{} 700 } 701 var request hyper_proto.UpdateSubnetsRequest 702 for _, needSubnet := range needSubnets { 703 if index, ok := haveSubnetsMap[needSubnet.Id]; ok { 704 haveSubnet := haveSubnets[index] 705 delete(subnetsToDelete, haveSubnet.Id) 706 if !needSubnet.Equal(&haveSubnet) { 707 request.Change = append(request.Change, needSubnet.Subnet) 708 } 709 } else { 710 request.Add = append(request.Add, needSubnet.Subnet) 711 } 712 } 713 for subnetId := range subnetsToDelete { 714 request.Delete = append(request.Delete, subnetId) 715 } 716 if len(request.Add) < 1 && len(request.Change) < 1 && 717 len(request.Delete) < 1 { 718 return 719 } 720 client, err := srpc.DialHTTP("tcp", h.address(), time.Minute) 721 if err != nil { 722 h.logger.Println(err) 723 return 724 } 725 defer client.Close() 726 var reply hyper_proto.UpdateSubnetsResponse 727 err = client.RequestReply("Hypervisor.UpdateSubnets", request, &reply) 728 if err == nil { 729 err = errors.New(reply.Error) 730 } 731 if err != nil { 732 h.logger.Println(err) 733 return 734 } 735 h.logger.Debugf(0, "Added %d, changed %d and deleted %d subnets\n", 736 len(request.Add), len(request.Change), len(request.Delete)) 737 } 738 739 func (m *Manager) processVmUpdates(h *hypervisorType, 740 updateVMs map[string]*hyper_proto.VmInfo) { 741 for ipAddr, vm := range updateVMs { 742 if len(vm.Volumes) < 1 { 743 updateVMs[ipAddr] = nil 744 } 745 } 746 m.mutex.Lock() 747 defer m.mutex.Unlock() 748 m.processVmUpdatesWithLock(h, updateVMs) 749 } 750 751 func (m *Manager) processVmUpdatesWithLock(h *hypervisorType, 752 updateVMs map[string]*hyper_proto.VmInfo) { 753 update := fm_proto.Update{ 754 ChangedVMs: make(map[string]*hyper_proto.VmInfo), 755 VmToHypervisor: make(map[string]string), 756 } 757 vmsToDelete := make(map[string]struct{}) 758 for ipAddr, protoVm := range updateVMs { 759 if protoVm == nil { 760 if _, ok := h.migratingVms[ipAddr]; !ok { 761 vmsToDelete[ipAddr] = struct{}{} 762 } else { 763 delete(h.migratingVms, ipAddr) 764 delete(m.migratingIPs, ipAddr) 765 h.logger.Debugf(0, "forgot migrating VM: %s\n", ipAddr) 766 } 767 } else { 768 if protoVm.State == hyper_proto.StateMigrating { 769 if _, ok := h.vms[ipAddr]; ok { 770 vmsToDelete[ipAddr] = struct{}{} 771 } 772 h.migratingVms[ipAddr] = &vmInfoType{ipAddr, *protoVm, 773 h.location, h} 774 m.migratingIPs[ipAddr] = struct{}{} 775 } else if vm, ok := h.vms[ipAddr]; ok { 776 if !vm.VmInfo.Equal(protoVm) { 777 err := m.storer.WriteVm(h.machine.HostIpAddress, ipAddr, 778 *protoVm) 779 if err != nil { 780 h.logger.Printf("error writing VM: %s: %s\n", 781 ipAddr, err) 782 } else { 783 h.logger.Debugf(0, "updated VM: %s\n", ipAddr) 784 } 785 } 786 vm.VmInfo = *protoVm 787 update.ChangedVMs[ipAddr] = protoVm 788 update.VmToHypervisor[ipAddr] = h.machine.Hostname 789 } else { 790 if _, ok := h.migratingVms[ipAddr]; ok { 791 delete(h.migratingVms, ipAddr) 792 delete(m.migratingIPs, ipAddr) 793 } 794 vm := &vmInfoType{ipAddr, *protoVm, h.location, h} 795 h.vms[ipAddr] = vm 796 m.vms[ipAddr] = vm 797 err := m.storer.WriteVm(h.machine.HostIpAddress, ipAddr, 798 *protoVm) 799 if err != nil { 800 h.logger.Printf("error writing VM: %s: %s\n", ipAddr, err) 801 } else { 802 h.logger.Debugf(0, "wrote VM: %s\n", ipAddr) 803 } 804 update.ChangedVMs[ipAddr] = protoVm 805 } 806 } 807 } 808 for ipAddr := range vmsToDelete { 809 delete(h.vms, ipAddr) 810 delete(m.vms, ipAddr) 811 err := m.storer.DeleteVm(h.machine.HostIpAddress, ipAddr) 812 if err != nil { 813 h.logger.Printf("error deleting VM: %s: %s\n", ipAddr, err) 814 } else { 815 h.logger.Debugf(0, "deleted VM: %s\n", ipAddr) 816 } 817 update.DeletedVMs = append(update.DeletedVMs, ipAddr) 818 } 819 h.allocatedMilliCPUs = 0 820 h.allocatedMemory = 0 821 h.allocatedVolumeBytes = 0 822 for _, vm := range h.vms { 823 h.allocatedMilliCPUs += uint64(vm.MilliCPUs) 824 h.allocatedMemory += vm.MemoryInMiB 825 for _, volume := range vm.Volumes { 826 h.allocatedVolumeBytes += volume.Size 827 } 828 } 829 m.sendUpdate(h.location, &update) 830 } 831 832 func (m *Manager) splitChanges(hypersToChange []*hypervisorType, 833 hypersToDelete []*hypervisorType) map[string]*fm_proto.Update { 834 updates := make(map[string]*fm_proto.Update) 835 for _, h := range hypersToChange { 836 if locationUpdate, ok := updates[h.location]; !ok { 837 updates[h.location] = &fm_proto.Update{ 838 ChangedMachines: []*fm_proto.Machine{h.getMachine()}, 839 } 840 } else { 841 locationUpdate.ChangedMachines = append( 842 locationUpdate.ChangedMachines, h.getMachine()) 843 } 844 } 845 for _, h := range hypersToDelete { 846 if locationUpdate, ok := updates[h.location]; !ok { 847 updates[h.location] = &fm_proto.Update{ 848 DeletedMachines: []string{h.machine.Hostname}, 849 } 850 } else { 851 locationUpdate.DeletedMachines = append( 852 locationUpdate.DeletedMachines, h.machine.Hostname) 853 } 854 } 855 return updates 856 } 857 858 func (m *Manager) sendUpdate(hyperLocation string, update *fm_proto.Update) { 859 if len(update.ChangedMachines) < 1 && len(update.ChangedVMs) < 1 && 860 len(update.DeletedMachines) < 1 && len(update.DeletedVMs) < 1 { 861 return 862 } 863 for locationStr, location := range m.locations { 864 if !testInLocation(hyperLocation, locationStr) { 865 continue 866 } 867 for rChannel, sChannel := range location.notifiers { 868 select { 869 case sChannel <- *update: 870 default: 871 delete(location.notifiers, rChannel) 872 delete(m.notifiers, rChannel) 873 close(sChannel) 874 } 875 } 876 } 877 }