github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/liveness/polling.go (about) 1 package liveness 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "container/list" 7 "encoding/binary" 8 "encoding/json" 9 "fmt" 10 "io/ioutil" 11 "math/big" 12 "net" 13 "net/http" 14 "os" 15 "reflect" 16 "regexp" 17 "time" 18 19 "github.com/swiftstack/ProxyFS/jrpcfs" 20 "github.com/swiftstack/ProxyFS/logger" 21 "github.com/swiftstack/ProxyFS/utils" 22 ) 23 24 type pingReqStruct struct { 25 JSONrpc string `json:"jsonrpc"` 26 Method string `json:"method"` 27 Params [1]jrpcfs.PingReq `json:"params"` 28 ID uint64 `json:"id"` 29 } 30 31 type pingReplyStruct struct { 32 ID uint64 `json:"id"` 33 Result jrpcfs.PingReply `json:"result"` 34 Error string `json:"error"` 35 } 36 37 type ringFilePayloadJSONDevStruct struct { 38 IP string `json:"ip"` 39 Port uint16 `json:"port"` 40 } 41 42 type ringFilePayloadJSONStruct struct { 43 Devs []*ringFilePayloadJSONDevStruct `json:"devs"` 44 } 45 46 type reconDevReportStruct struct { 47 Size int64 `json:"size"` 48 Used int64 `json:"used"` 49 } 50 51 const maxRPCReplySize = 4096 52 53 func livenessChecker() { 54 var ( 55 checkEntityList *list.List 56 durationPerCheck time.Duration 57 entitiesToCheck uint64 58 entityToCheck *list.Element 59 err error 60 livenessCheckerControlChanValue bool 61 reconEndpoint *internalReconEndpointReportStruct 62 servingPeer *internalServingPeerReportStruct 63 volume *internalVolumeReportStruct 64 volumeGroup *internalVolumeGroupReportStruct 65 ) 66 67 for { 68 globals.Lock() 69 70 if nil == globals.myObservingPeerReport { 71 // Just wait to be signaled to check again... or exit 72 globals.Unlock() 73 livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan 74 if livenessCheckerControlChanValue { 75 // Just loop back and re-check globals.myObservingPeerReport 76 } else { 77 // Exit 78 globals.livenessCheckerWG.Done() 79 return 80 } 81 } else { // nil != globals.myObservingPeerReport 82 // Check to see if we are supposed to exit 83 84 select { 85 case livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan: 86 if livenessCheckerControlChanValue { 87 // Just fall into checkEntityList processing 88 } else { 89 // Exit 90 globals.Unlock() 91 globals.livenessCheckerWG.Done() 92 return 93 } 94 default: 95 // Just fall into checkEntityList processing 96 } 97 98 // Compute randomly-ordered list of entities to check 99 100 checkEntityList = list.New() 101 102 for _, servingPeer = range globals.myObservingPeerReport.servingPeer { 103 _ = checkEntityList.PushBack(servingPeer) 104 105 for _, volumeGroup = range servingPeer.volumeGroup { 106 _ = checkEntityList.PushBack(volumeGroup) 107 108 for _, volume = range volumeGroup.volume { 109 _ = checkEntityList.PushBack(volume) 110 } 111 } 112 } 113 114 for _, reconEndpoint = range globals.myObservingPeerReport.reconEndpoint { 115 _ = checkEntityList.PushBack(reconEndpoint) 116 } 117 118 globals.Unlock() 119 120 utils.RandomizeList(checkEntityList) 121 122 // Compute number of entities to check & time between each check 123 // Allow for one extra time slice to hopefully get all entities checked 124 125 entitiesToCheck = uint64(checkEntityList.Len()) 126 127 durationPerCheck = globals.heartbeatDuration / time.Duration(entitiesToCheck+1) 128 129 // Perform each check spaced out by durationPerCheck 130 131 entityToCheck = checkEntityList.Front() 132 133 for nil != entityToCheck { 134 switch reflect.TypeOf(entityToCheck.Value) { 135 case reflect.TypeOf(servingPeer): 136 livenessCheckServingPeer(entityToCheck.Value.(*internalServingPeerReportStruct)) 137 case reflect.TypeOf(volumeGroup): 138 livenessCheckVolumeGroup(entityToCheck.Value.(*internalVolumeGroupReportStruct)) 139 case reflect.TypeOf(volume): 140 livenessCheckVolume(entityToCheck.Value.(*internalVolumeReportStruct)) 141 case reflect.TypeOf(reconEndpoint): 142 livenessCheckReconEndpoint(entityToCheck.Value.(*internalReconEndpointReportStruct)) 143 default: 144 err = fmt.Errorf("Unrecognized reflect.TypeOf(entityToCheck.Value): %v", reflect.TypeOf(entityToCheck.Value)) 145 panic(err) 146 } 147 148 // Delay before next entityToCheck 149 150 select { 151 case livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan: 152 if livenessCheckerControlChanValue { 153 // Just finish current checks before re-checking globals.myObservingPeerReport 154 } else { 155 // Exit 156 globals.livenessCheckerWG.Done() 157 return 158 } 159 case <-time.After(durationPerCheck): 160 // Proceed to next check 161 } 162 163 // Loop back for next entityToCheck 164 165 entityToCheck = entityToCheck.Next() 166 } 167 } 168 } 169 } 170 171 func livenessCheckServingPeer(servingPeer *internalServingPeerReportStruct) { 172 var ( 173 err error 174 nextID uint64 175 pingReply pingReplyStruct 176 pingReplyBuf []byte 177 pingReplyLen int 178 pingReq pingReqStruct 179 pingReqBuf []byte 180 servingPeerState string 181 tcpAddr *net.TCPAddr 182 tcpAddrToResolve string 183 tcpConn *net.TCPConn 184 timeNow time.Time 185 ) 186 187 // Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown) 188 189 timeNow = time.Now() 190 servingPeerState = StateUnknown 191 192 defer func() { 193 globals.Lock() 194 servingPeer.state = servingPeerState 195 servingPeer.lastCheckTime = timeNow 196 globals.Unlock() 197 }() 198 199 // Form RpcPing message to poll servingPeer's JSONRPCServer.TCPPort with 200 201 pingReq.JSONrpc = "2.0" 202 pingReq.Method = "Server.RpcPing" 203 pingReq.Params[0].Message = "Ping at " + timeNow.Format(time.RFC3339) 204 pingReq.ID = nextID 205 206 pingReqBuf, err = json.Marshal(pingReq) 207 if nil != err { 208 err = fmt.Errorf("json.Marshal(pingReq) failed: %v", err) 209 logger.Error(err) 210 return 211 } 212 213 // Compute tcpAddr of servingPeer's JSONRPCServer.TCPPort 214 215 if servingPeer.name == globals.whoAmI { 216 tcpAddrToResolve = net.JoinHostPort(globals.myPrivateIPAddr.String(), fmt.Sprintf("%d", globals.jsonRPCServerPort)) 217 } else { 218 tcpAddrToResolve = net.JoinHostPort(globals.peersByName[servingPeer.name].privateIPAddr.String(), fmt.Sprintf("%d", globals.jsonRPCServerPort)) 219 } 220 221 tcpAddr, err = net.ResolveTCPAddr("tcp", tcpAddrToResolve) 222 223 if nil != err { 224 err = fmt.Errorf("net.ResolveTCPAddr(\"tcp\", \"%v\") failed: %v", tcpAddrToResolve, err) 225 logger.Error(err) 226 return 227 } 228 229 // Perform RpcPing... from here on, default servingPeer.state == StateDead 230 231 servingPeerState = StateDead 232 233 tcpConn, err = net.DialTCP("tcp", nil, tcpAddr) 234 if nil != err { 235 return 236 } 237 238 err = tcpConn.SetDeadline(time.Now().Add(globals.maxRequestDuration)) 239 if nil != err { 240 return 241 } 242 243 _, err = tcpConn.Write(pingReqBuf) 244 if nil != err { 245 return 246 } 247 248 pingReplyBuf = make([]byte, maxRPCReplySize) 249 250 pingReplyLen, err = tcpConn.Read(pingReplyBuf) 251 if nil != err { 252 return 253 } 254 255 err = tcpConn.Close() 256 if nil != err { 257 return 258 } 259 260 pingReplyBuf = pingReplyBuf[:pingReplyLen] 261 262 err = json.Unmarshal(pingReplyBuf, &pingReply) 263 if nil != err { 264 return 265 } 266 267 // RpcPing worked... so ensure servingPeer.state == StateAlive 268 269 servingPeerState = StateAlive 270 } 271 272 func livenessCheckVolumeGroup(volumeGroup *internalVolumeGroupReportStruct) { 273 var ( 274 volumeGroupState string 275 timeNow time.Time 276 ) 277 278 // Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown) 279 280 timeNow = time.Now() 281 volumeGroupState = StateUnknown 282 283 defer func() { 284 globals.Lock() 285 volumeGroup.state = volumeGroupState 286 volumeGroup.lastCheckTime = timeNow 287 globals.Unlock() 288 }() 289 290 // TODO: Implement livenessCheckVolumeGroup() 291 } 292 293 func livenessCheckVolume(volume *internalVolumeReportStruct) { 294 var ( 295 volumeState string 296 timeNow time.Time 297 ) 298 299 // Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown) 300 301 timeNow = time.Now() 302 volumeState = StateUnknown 303 304 defer func() { 305 globals.Lock() 306 volume.state = volumeState 307 volume.lastCheckTime = timeNow 308 globals.Unlock() 309 }() 310 311 // TODO: Implement livenessCheckVolume() 312 } 313 314 func livenessCheckReconEndpoint(reconEndpoint *internalReconEndpointReportStruct) { 315 var ( 316 bigDividend *big.Int 317 bigDivisor *big.Int 318 bigQuotient *big.Int 319 bigRemainder *big.Int 320 devUtilization uint8 321 err error 322 quotient int64 323 reconDevReport *reconDevReportStruct 324 reconDevReportSlice []*reconDevReportStruct 325 reconResp *http.Response 326 reconRespBody []byte 327 remainder int64 328 url string 329 ) 330 331 reconEndpoint.maxDiskUsagePercentage = 0 332 333 url = fmt.Sprintf("http://%s/recon/diskusage", reconEndpoint.ipAddrPort) 334 335 reconResp, err = http.Get(url) 336 if nil == err { 337 reconRespBody, err = ioutil.ReadAll(reconResp.Body) 338 if nil == err { 339 if http.StatusOK == reconResp.StatusCode { 340 reconDevReportSlice = make([]*reconDevReportStruct, 0) 341 err = json.Unmarshal(reconRespBody, &reconDevReportSlice) 342 if nil == err { 343 for _, reconDevReport = range reconDevReportSlice { 344 if (reconDevReport.Used > 0) && (reconDevReport.Size > 0) && (reconDevReport.Used <= reconDevReport.Size) { 345 bigDividend = new(big.Int).Mul(big.NewInt(100), big.NewInt(reconDevReport.Used)) 346 bigDivisor = big.NewInt(reconDevReport.Size) 347 bigQuotient = new(big.Int).Quo(bigDividend, bigDivisor) 348 bigRemainder = new(big.Int).Rem(bigDividend, bigDivisor) 349 quotient = bigQuotient.Int64() 350 remainder = bigRemainder.Int64() 351 if 0 == remainder { 352 devUtilization = uint8(quotient) 353 } else { 354 devUtilization = uint8(quotient) + 1 355 } 356 if devUtilization > reconEndpoint.maxDiskUsagePercentage { 357 reconEndpoint.maxDiskUsagePercentage = devUtilization 358 } 359 } else { 360 logger.Warnf("livenessCheckReconEndpoint() GET to %s got responseBody with unreasonable used and size values", url) 361 } 362 } 363 } else { 364 logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s got response.Body with invalid JSON", url) 365 } 366 } else { 367 logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s got bad status: %s", url, reconResp.Status) 368 } 369 } else { 370 logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s response.Body() read failed", url) 371 } 372 err = reconResp.Body.Close() 373 if nil != err { 374 logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s response.Body.Close() failed", url) 375 } 376 } else { 377 logger.WarnfWithError(err, "livenessCheckReconEndpoint() failed to issue GET to %s", url) 378 } 379 } 380 381 // computeLivenessCheckAssignments takes a list of ObservingPeer and produces a 382 // template internalLivenessReport that is to be filled in by this collection of peers. 383 // While the elements of the resultant internalLivenessReport have State, LastCheckTime, 384 // and MaxDiskUsagePercentage fields, these are ignored as they will ultimately be filled 385 // in by each ObservingPeer. The livenessCheckRedundancy is used to ensure that each 386 // ServingPeer, VolumeGroup, Volume, and ReconEndpoint is adequately covered. As every 387 // Volume is part of a VolumeGroup and every VolumeGroup is assigned to a single ServingPeer, 388 // this amounts to just dolling out the Volumes to ObervingPeers with the required 389 // livenessCheckRedundancy. Similarly, the ReconEndpoints are dolled out with this 390 // same livenessCheckRedundancy. 391 // 392 // It is a bit misleading for an ObservingPeer to report that a VolumeGroup is "alive" 393 // when not all of that VolumeGroup's Volumes have been checked. Similarly, it is a 394 // bit misleading for an ObservingPeer to report that a ServingPeer is "alive" when 395 // not all of that ServingPeer's VolumeGroups have been checked. Therefore, to get an 396 // accurate picture of that state of a VolumeGroup or ServingPeer, all results from 397 // all ObservingPeers should be consulted as a set when making any availability 398 // decision. As there is no way to check an empty VolumeGroup, there state will not 399 // be in the resultant internalLivenessReport. However, ServingPeers that have no 400 // VolumeGroups assigned will still be in the resultant internalLivenessReport. 401 func computeLivenessCheckAssignments(observingPeerNameList []string) (internalLivenessReport *internalLivenessReportStruct) { 402 var ( 403 alreadyInSwiftReconEndpointIAddrSet bool 404 curSwiftConfFileMap map[string]time.Time 405 effectiveLivenessCheckRedundancy uint64 406 effectiveLivenessCheckRedundancyIndex uint64 407 err error 408 fileInfo os.FileInfo 409 fileInfoSlice []os.FileInfo 410 fileInfoModTime time.Time 411 fileInfoName string 412 inSwiftConfFileMap bool 413 internalObservingPeerReport *internalObservingPeerReportStruct 414 internalReconEndpointReport *internalReconEndpointReportStruct 415 internalServingPeerReport *internalServingPeerReportStruct 416 internalVolumeGroupReport *internalVolumeGroupReportStruct 417 internalVolumeReport *internalVolumeReportStruct 418 matchedRingFilename bool 419 needToUpdateSwiftConfFileMap bool 420 notYetAdded bool 421 observingPeerIndex uint64 422 observingPeerName string 423 ok bool 424 prevFileInfoModTime time.Time 425 ringFileData []byte 426 ringFileName string 427 ringFileMagic []byte 428 ringFilePayload []byte 429 ringFilePayloadJSON *ringFilePayloadJSONStruct 430 ringFilePayloadJSONDev *ringFilePayloadJSONDevStruct 431 ringFilePayloadLen int32 432 ringFileReader *gzip.Reader 433 ringFileReadLen int 434 ringFileVersion uint16 435 servingPeer *peerStruct 436 servingPeerName string 437 swiftReconEndpoint string 438 swiftReconEndpointIPAddrSet map[string]struct{} 439 volumeGroup *volumeGroupStruct 440 volumeGroupName string 441 volumeName string 442 volumeToCheck *volumeStruct 443 ) 444 445 if 0 == len(observingPeerNameList) { 446 err = fmt.Errorf("computeLivenessCheckAssignments(): len(observingPeerNameList) cannot be zero") 447 panic(err) 448 } 449 450 // Determine reconEndpoints 451 452 if 0 == globals.swiftReconChecksPerConfCheck { 453 globals.swiftReconEndpointSet = make(map[string]struct{}) 454 } else { 455 if 0 == globals.swiftReconChecksUntilConfCheck { 456 // Time to potentially refresh globals.swiftConfFileMap & globals.swiftReconEndpointSet 457 458 globals.swiftReconChecksUntilConfCheck = globals.swiftReconChecksPerConfCheck 459 460 fileInfoSlice, err = ioutil.ReadDir(globals.swiftConfDir) 461 if nil != err { 462 logger.FatalfWithError(err, "Unable to read [SwiftClient]SwiftConfDir (%s)", globals.swiftConfDir) 463 } 464 465 curSwiftConfFileMap = make(map[string]time.Time) 466 467 for _, fileInfo = range fileInfoSlice { 468 fileInfoName = fileInfo.Name() 469 switch fileInfoName { 470 case "account.ring.gz": 471 matchedRingFilename = true 472 case "container.ring.gz": 473 matchedRingFilename = true 474 default: 475 matchedRingFilename, err = regexp.MatchString("^object.*\\.ring\\.gz$", fileInfoName) 476 if nil != err { 477 logger.FatalfWithError(err, "Unexpected failure calling regexp.MatchString()") 478 } 479 } 480 481 if matchedRingFilename { 482 curSwiftConfFileMap[fileInfoName] = fileInfo.ModTime() 483 } 484 } 485 486 if len(globals.swiftConfFileMap) != len(curSwiftConfFileMap) { 487 needToUpdateSwiftConfFileMap = true 488 } else { 489 needToUpdateSwiftConfFileMap = false 490 for fileInfoName, fileInfoModTime = range curSwiftConfFileMap { 491 prevFileInfoModTime, inSwiftConfFileMap = globals.swiftConfFileMap[fileInfoName] 492 if !inSwiftConfFileMap || (fileInfoModTime != prevFileInfoModTime) { 493 needToUpdateSwiftConfFileMap = true 494 } 495 } 496 } 497 498 if needToUpdateSwiftConfFileMap { 499 // We must refresh globals.swiftConfFileMap & globals.swiftReconEndpointSet 500 501 globals.swiftConfFileMap = curSwiftConfFileMap 502 503 swiftReconEndpointIPAddrSet = make(map[string]struct{}) 504 globals.swiftReconEndpointSet = make(map[string]struct{}) 505 506 for ringFileName = range globals.swiftConfFileMap { 507 ringFileData, err = ioutil.ReadFile(globals.swiftConfDir + "/" + ringFileName) 508 if nil == err { 509 ringFileReader, err = gzip.NewReader(bytes.NewReader(ringFileData)) 510 if nil == err { 511 ringFileMagic = make([]byte, 4) 512 ringFileReadLen, err = ringFileReader.Read(ringFileMagic) 513 if nil == err { 514 if ringFileReadLen == len(ringFileMagic) { 515 if bytes.Equal([]byte("R1NG"), ringFileMagic) { 516 err = binary.Read(ringFileReader, binary.BigEndian, &ringFileVersion) 517 if nil == err { 518 if 1 == ringFileVersion { 519 err = binary.Read(ringFileReader, binary.BigEndian, &ringFilePayloadLen) 520 if nil == err { 521 ringFilePayload = make([]byte, ringFilePayloadLen) 522 ringFileReadLen, err = ringFileReader.Read(ringFilePayload) 523 if nil == err { 524 if ringFileReadLen == len(ringFilePayload) { 525 ringFilePayloadJSON = &ringFilePayloadJSONStruct{} 526 err = json.Unmarshal(ringFilePayload, ringFilePayloadJSON) 527 if nil == err { 528 for _, ringFilePayloadJSONDev = range ringFilePayloadJSON.Devs { 529 if nil != ringFilePayloadJSONDev { 530 _, alreadyInSwiftReconEndpointIAddrSet = swiftReconEndpointIPAddrSet[ringFilePayloadJSONDev.IP] 531 if !alreadyInSwiftReconEndpointIAddrSet { 532 swiftReconEndpointIPAddrSet[ringFilePayloadJSONDev.IP] = struct{}{} 533 swiftReconEndpoint = fmt.Sprintf("%s:%d", ringFilePayloadJSONDev.IP, ringFilePayloadJSONDev.Port) 534 globals.swiftReconEndpointSet[swiftReconEndpoint] = struct{}{} 535 } 536 } 537 } 538 } else { 539 logger.WarnfWithError(err, "Unable to json.Unmarshal ringFilePayload from ring file %s", fileInfoName) 540 } 541 } else { 542 logger.Warnf("Misread of ringFilePayload from ring file %s", fileInfoName) 543 } 544 } else { 545 logger.WarnfWithError(err, "Unable to read ringFilePayload from ring file %s", fileInfoName) 546 } 547 } else { 548 logger.WarnfWithError(err, "Unable to read ringFilePayloadLen from ring file %s", fileInfoName) 549 } 550 } else { 551 logger.Warnf("Value of ringFileVersion unexpected from ring file %s", fileInfoName) 552 } 553 } else { 554 logger.WarnfWithError(err, "Unable to read ringFileVersion from ring file %s", fileInfoName) 555 } 556 } else { 557 logger.Warnf("Value of ringFileMagic unexpected from ring file %s", fileInfoName) 558 } 559 } else { 560 logger.Warnf("Misread of ringFileMagic from ring file %s", fileInfoName) 561 } 562 } else { 563 logger.WarnfWithError(err, "Unable to read ringFileMagic from ring file %s", fileInfoName) 564 } 565 err = ringFileReader.Close() 566 if nil != err { 567 logger.WarnfWithError(err, "Unable to close gzip.Reader from ring file %s", fileInfoName) 568 } 569 } else { 570 logger.WarnfWithError(err, "Unable to create gzip.Reader from ring file %s", fileInfoName) 571 } 572 } else { 573 logger.WarnfWithError(err, "Unable to read ring file %s", fileInfoName) 574 } 575 } 576 } 577 } else { 578 globals.swiftReconChecksUntilConfCheck-- 579 } 580 } 581 582 // Prepare fresh internalLivenessReport 583 584 internalLivenessReport = &internalLivenessReportStruct{ 585 observingPeer: make(map[string]*internalObservingPeerReportStruct), 586 } 587 588 // Adjust effectiveLivenessCheckRedundancy to be no more than len(observingPeerNameList) 589 590 if uint64(len(observingPeerNameList)) < globals.livenessCheckRedundancy { 591 effectiveLivenessCheckRedundancy = uint64(len(observingPeerNameList)) 592 } else { 593 effectiveLivenessCheckRedundancy = globals.livenessCheckRedundancy 594 } 595 596 // Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling Volumes 597 598 observingPeerIndex = 0 599 600 for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ { 601 for _, volumeToCheck = range globals.volumeToCheckList { 602 // Add volumeToCheck to currently indexed ObservingPeer 603 604 volumeName = volumeToCheck.name 605 volumeGroup = volumeToCheck.volumeGroup 606 volumeGroupName = volumeGroup.name 607 servingPeer = volumeGroup.peer 608 if nil == servingPeer { 609 servingPeerName = globals.whoAmI 610 } else { 611 servingPeerName = servingPeer.name 612 } 613 614 notYetAdded = true // Avoid duplicate assignments 615 616 for notYetAdded { 617 observingPeerName = observingPeerNameList[observingPeerIndex] 618 619 internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName] 620 if !ok { 621 internalObservingPeerReport = &internalObservingPeerReportStruct{ 622 name: observingPeerName, 623 servingPeer: make(map[string]*internalServingPeerReportStruct), 624 reconEndpoint: make(map[string]*internalReconEndpointReportStruct), 625 } 626 internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport 627 } 628 629 internalServingPeerReport, ok = internalObservingPeerReport.servingPeer[servingPeerName] 630 if !ok { 631 internalServingPeerReport = &internalServingPeerReportStruct{ 632 observingPeer: internalObservingPeerReport, 633 name: servingPeerName, 634 state: StateUnknown, 635 lastCheckTime: time.Time{}, 636 volumeGroup: make(map[string]*internalVolumeGroupReportStruct), 637 } 638 internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport 639 } 640 641 internalVolumeGroupReport, ok = internalServingPeerReport.volumeGroup[volumeGroupName] 642 if !ok { 643 internalVolumeGroupReport = &internalVolumeGroupReportStruct{ 644 servingPeer: internalServingPeerReport, 645 name: volumeGroupName, 646 state: StateUnknown, 647 lastCheckTime: time.Time{}, 648 volume: make(map[string]*internalVolumeReportStruct), 649 } 650 internalServingPeerReport.volumeGroup[volumeGroupName] = internalVolumeGroupReport 651 } 652 653 _, ok = internalVolumeGroupReport.volume[volumeName] 654 655 if ok { 656 // Need to step to the next ObservingPeer because this one is already watching this Volume 657 } else { 658 // New Volume for this ObservingPeer... so add it 659 660 internalVolumeReport = &internalVolumeReportStruct{ 661 volumeGroup: internalVolumeGroupReport, 662 name: volumeName, 663 state: StateUnknown, 664 lastCheckTime: time.Time{}, 665 } 666 667 internalVolumeGroupReport.volume[volumeName] = internalVolumeReport 668 669 notYetAdded = false 670 } 671 672 // Cycle to next ObservingPeer 673 674 observingPeerIndex++ 675 if observingPeerIndex == uint64(len(observingPeerNameList)) { 676 observingPeerIndex = 0 677 } 678 } 679 } 680 } 681 682 // Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling "empty" VolumeGroups 683 684 for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ { 685 for volumeGroupName, servingPeerName = range globals.emptyVolumeGroupToCheckSet { 686 // Add "empty" VolumeGroup to currently indexed ObservingPeer 687 688 notYetAdded = true // Avoid duplicate assignments 689 690 for notYetAdded { 691 observingPeerName = observingPeerNameList[observingPeerIndex] 692 693 internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName] 694 if !ok { 695 internalObservingPeerReport = &internalObservingPeerReportStruct{ 696 name: observingPeerName, 697 servingPeer: make(map[string]*internalServingPeerReportStruct), 698 reconEndpoint: make(map[string]*internalReconEndpointReportStruct), 699 } 700 internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport 701 } 702 703 internalServingPeerReport, ok = internalObservingPeerReport.servingPeer[servingPeerName] 704 if !ok { 705 internalServingPeerReport = &internalServingPeerReportStruct{ 706 observingPeer: internalObservingPeerReport, 707 name: servingPeerName, 708 state: StateUnknown, 709 lastCheckTime: time.Time{}, 710 volumeGroup: make(map[string]*internalVolumeGroupReportStruct), 711 } 712 } 713 internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport 714 715 _, ok = internalServingPeerReport.volumeGroup[volumeGroupName] 716 717 if ok { 718 // Need to step to the next ObservingPeer because this one is already watching this VolumeGroup 719 } else { 720 // New VolumeGroup for this ObservingPeer->ServingPeer... so add it 721 722 internalVolumeGroupReport = &internalVolumeGroupReportStruct{ 723 servingPeer: internalServingPeerReport, 724 name: volumeGroupName, 725 state: StateUnknown, 726 lastCheckTime: time.Time{}, 727 volume: make(map[string]*internalVolumeReportStruct), 728 } 729 730 internalServingPeerReport.volumeGroup[volumeGroupName] = internalVolumeGroupReport 731 732 notYetAdded = false 733 } 734 735 // Cycle to next ObservingPeer 736 737 observingPeerIndex++ 738 if observingPeerIndex == uint64(len(observingPeerNameList)) { 739 observingPeerIndex = 0 740 } 741 } 742 } 743 } 744 745 // Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling "empty" ServingPeers 746 747 for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ { 748 for servingPeerName = range globals.emptyServingPeerToCheckSet { 749 // Add "empty" ServingPeer to currently indexed ObservingPeer 750 751 notYetAdded = true // Avoid duplicate assignments 752 753 for notYetAdded { 754 observingPeerName = observingPeerNameList[observingPeerIndex] 755 756 internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName] 757 if !ok { 758 internalObservingPeerReport = &internalObservingPeerReportStruct{ 759 name: observingPeerName, 760 servingPeer: make(map[string]*internalServingPeerReportStruct), 761 reconEndpoint: make(map[string]*internalReconEndpointReportStruct), 762 } 763 internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport 764 } 765 766 _, ok = internalObservingPeerReport.servingPeer[servingPeerName] 767 768 if ok { 769 // Need to step to the next ObservingPeer because this one is already watching this ServingPeer 770 } else { 771 // New ServingPeer for this ObservingPeer... so add it 772 773 internalServingPeerReport = &internalServingPeerReportStruct{ 774 observingPeer: internalObservingPeerReport, 775 name: servingPeerName, 776 state: StateUnknown, 777 lastCheckTime: time.Time{}, 778 volumeGroup: make(map[string]*internalVolumeGroupReportStruct), 779 } 780 781 internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport 782 783 notYetAdded = false 784 } 785 786 // Cycle to next ObservingPeer 787 788 observingPeerIndex++ 789 if observingPeerIndex == uint64(len(observingPeerNameList)) { 790 observingPeerIndex = 0 791 } 792 } 793 } 794 } 795 796 // Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling ReconEndpoints 797 798 for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ { 799 for swiftReconEndpoint = range globals.swiftReconEndpointSet { 800 // Add volumeToCheck to currently indexed ObservingPeer 801 802 notYetAdded = true // Avoid duplicate assignments 803 804 for notYetAdded { 805 observingPeerName = observingPeerNameList[observingPeerIndex] 806 807 internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName] 808 if !ok { 809 internalObservingPeerReport = &internalObservingPeerReportStruct{ 810 name: observingPeerName, 811 servingPeer: make(map[string]*internalServingPeerReportStruct), 812 reconEndpoint: make(map[string]*internalReconEndpointReportStruct), 813 } 814 internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport 815 } 816 817 _, ok = internalObservingPeerReport.reconEndpoint[swiftReconEndpoint] 818 819 if ok { 820 // Need to step to the next ObservingPeer because this one is already watching this ReconEndpoint 821 } else { 822 // New ReconEndpoint for this ObservingPeer... so add it 823 824 internalReconEndpointReport = &internalReconEndpointReportStruct{ 825 observingPeer: internalObservingPeerReport, 826 ipAddrPort: swiftReconEndpoint, 827 maxDiskUsagePercentage: 0, 828 } 829 830 internalObservingPeerReport.reconEndpoint[swiftReconEndpoint] = internalReconEndpointReport 831 832 notYetAdded = false 833 } 834 835 // Cycle to next ObservingPeer 836 837 observingPeerIndex++ 838 if observingPeerIndex == uint64(len(observingPeerNameList)) { 839 observingPeerIndex = 0 840 } 841 } 842 } 843 } 844 845 return 846 } 847 848 func mergeObservingPeerReportIntoLivenessReport(internalObservingPeerReport *internalObservingPeerReportStruct, internalLivenessReport *internalLivenessReportStruct) { 849 var ( 850 ok bool 851 ) 852 853 _, ok = internalLivenessReport.observingPeer[internalObservingPeerReport.name] 854 if ok { 855 delete(internalLivenessReport.observingPeer, internalObservingPeerReport.name) 856 } 857 858 internalLivenessReport.observingPeer[internalObservingPeerReport.name] = internalObservingPeerReport 859 } 860 861 func updateMyObservingPeerReportWhileLocked(internalObservingPeerReport *internalObservingPeerReportStruct) { 862 var ( 863 ok bool 864 reconEndpointIPAddrPort string 865 reconEndpointIPAddrPortSet map[string]struct{} 866 servingPeerName string 867 servingPeerNameSet map[string]struct{} 868 servingPeerNew *internalServingPeerReportStruct 869 servingPeerOld *internalServingPeerReportStruct 870 volumeGroupName string 871 volumeGroupNameSet map[string]struct{} 872 volumeGroupNew *internalVolumeGroupReportStruct 873 volumeGroupOld *internalVolumeGroupReportStruct 874 volumeName string 875 volumeNameSet map[string]struct{} 876 ) 877 878 if (nil == globals.myObservingPeerReport) || (nil == internalObservingPeerReport) { 879 globals.myObservingPeerReport = internalObservingPeerReport 880 return 881 } 882 883 // Remove any ServingPeers from globals.myObservingPeerReport missing from internalObservingPeerReport 884 885 servingPeerNameSet = make(map[string]struct{}) 886 887 for servingPeerName = range globals.myObservingPeerReport.servingPeer { 888 _, ok = internalObservingPeerReport.servingPeer[servingPeerName] 889 if !ok { 890 servingPeerNameSet[servingPeerName] = struct{}{} 891 } 892 } 893 894 for servingPeerName = range servingPeerNameSet { 895 delete(globals.myObservingPeerReport.servingPeer, servingPeerName) 896 } 897 898 // Add any ServingPeers from internalObservingPeerReport missing from globals.myObservingPeerReport 899 900 for servingPeerName = range internalObservingPeerReport.servingPeer { 901 _, ok = globals.myObservingPeerReport.servingPeer[servingPeerName] 902 if !ok { 903 globals.myObservingPeerReport.servingPeer[servingPeerName] = &internalServingPeerReportStruct{ 904 observingPeer: globals.myObservingPeerReport, 905 name: servingPeerName, 906 state: StateUnknown, 907 lastCheckTime: time.Time{}, 908 volumeGroup: make(map[string]*internalVolumeGroupReportStruct), 909 } 910 } 911 } 912 913 // Now loop inside each ServingPeer (must now exist in both globals.myObservingPeerReport & internalObservingPeerReport) 914 915 for servingPeerName, servingPeerOld = range globals.myObservingPeerReport.servingPeer { 916 servingPeerNew = internalObservingPeerReport.servingPeer[servingPeerName] 917 918 // Remove any VolumeGroups from servingPeerOld missing from servingPeerNew 919 920 volumeGroupNameSet = make(map[string]struct{}) 921 922 for volumeGroupName = range servingPeerOld.volumeGroup { 923 _, ok = servingPeerNew.volumeGroup[volumeGroupName] 924 if !ok { 925 volumeGroupNameSet[volumeGroupName] = struct{}{} 926 } 927 } 928 929 for volumeGroupName = range volumeGroupNameSet { 930 delete(servingPeerOld.volumeGroup, volumeGroupName) 931 } 932 933 // Add any VolumeGroups from servingPeerNew missing from servingPeerOld 934 935 for volumeGroupName = range servingPeerNew.volumeGroup { 936 _, ok = servingPeerOld.volumeGroup[volumeGroupName] 937 if !ok { 938 servingPeerOld.volumeGroup[volumeGroupName] = &internalVolumeGroupReportStruct{ 939 servingPeer: servingPeerOld, 940 name: volumeGroupName, 941 state: StateUnknown, 942 lastCheckTime: time.Time{}, 943 volume: make(map[string]*internalVolumeReportStruct), 944 } 945 } 946 } 947 948 // Now loop inside each VolumeGroup (must now exist in both servingPeerOld & servingPeerNew) 949 950 for volumeGroupName, volumeGroupOld = range servingPeerOld.volumeGroup { 951 volumeGroupNew = servingPeerNew.volumeGroup[volumeGroupName] 952 953 // Remove any Volumes from volumeGroupOld missing from volumeGroupNew 954 955 volumeNameSet = make(map[string]struct{}) 956 957 for volumeName = range volumeGroupOld.volume { 958 _, ok = volumeGroupNew.volume[volumeName] 959 if !ok { 960 volumeNameSet[volumeName] = struct{}{} 961 } 962 } 963 964 for volumeName = range volumeNameSet { 965 delete(volumeGroupOld.volume, volumeName) 966 } 967 968 // Add any Volumes from volumeGroupNew missing from VolumeGroupOld 969 970 for volumeName = range volumeGroupNew.volume { 971 _, ok = volumeGroupOld.volume[volumeName] 972 if !ok { 973 volumeGroupOld.volume[volumeName] = &internalVolumeReportStruct{ 974 volumeGroup: volumeGroupOld, 975 name: volumeName, 976 state: StateUnknown, 977 lastCheckTime: time.Time{}, 978 } 979 } 980 } 981 } 982 } 983 984 // Remove any ReconEndpoints from globals.myObservingPeerReport missing from internalObservingPeerReport 985 986 reconEndpointIPAddrPortSet = make(map[string]struct{}) 987 988 for reconEndpointIPAddrPort = range globals.myObservingPeerReport.reconEndpoint { 989 _, ok = internalObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort] 990 if !ok { 991 reconEndpointIPAddrPortSet[reconEndpointIPAddrPort] = struct{}{} 992 } 993 } 994 995 for reconEndpointIPAddrPort = range reconEndpointIPAddrPortSet { 996 delete(globals.myObservingPeerReport.reconEndpoint, reconEndpointIPAddrPort) 997 } 998 999 // Add any ReconEndpoints from internalObservingPeerReport missing from globals.myObservingPeerReport 1000 1001 for reconEndpointIPAddrPort = range internalObservingPeerReport.reconEndpoint { 1002 _, ok = globals.myObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort] 1003 if !ok { 1004 globals.myObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort] = &internalReconEndpointReportStruct{ 1005 observingPeer: globals.myObservingPeerReport, 1006 ipAddrPort: reconEndpointIPAddrPort, 1007 maxDiskUsagePercentage: 0, 1008 } 1009 } 1010 } 1011 }