github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/liveness/config.go (about) 1 package liveness 2 3 import ( 4 "container/list" 5 "crypto/rand" 6 "fmt" 7 "hash/crc64" 8 "math" 9 "net" 10 "sync" 11 "time" 12 13 "github.com/swiftstack/ProxyFS/conf" 14 "github.com/swiftstack/ProxyFS/inode" 15 "github.com/swiftstack/ProxyFS/logger" 16 "github.com/swiftstack/ProxyFS/trackedlock" 17 "github.com/swiftstack/ProxyFS/transitions" 18 ) 19 20 const ( 21 PrivateClusterUDPPortDefault = uint16(8123) 22 23 UDPPacketSizeMin = uint64(1000) // Presumably >> udpPacketHeaderSize 24 UDPPacketSizeMax = uint64(8000) // Presumably >> udpPacketHeaderSize 25 26 UDPPacketSendSizeDefault = uint64(1400) 27 UDPPacketRecvSizeDefault = uint64(1500) 28 29 UDPPacketCapPerMessageDefault = uint8(math.MaxUint8) 30 31 HeartBeatDurationDefault = "1s" 32 33 HeartBeatMissLimitMin = uint64(2) 34 HeartBeatMissLimitDefault = uint64(3) 35 36 MessageQueueDepthPerPeerMin = uint64(1) 37 MessageQueueDepthPerPeerDefault = uint64(4) 38 39 MaxRequestDurationDefault = "1s" 40 41 LivenessCheckRedundancyMin = uint64(1) 42 LivenessCheckRedundancyDefault = uint64(2) 43 44 LogLevelNone = uint64(0) 45 LogLevelStateChanges = uint64(1) 46 LogLevelMessages = uint64(2) 47 LogLevelMessageDetails = uint64(3) 48 LogLevelMax = uint64(4) 49 50 LogLevelDefault = LogLevelNone 51 52 DefaultSwiftReconNoWriteThreshold = 80 53 DefaultSwiftReconReadOnlyThreshold = 90 54 DefaultSwiftConfDir = "/etc/swift" 55 DefaultSwiftReconChecksPerConfCheck = 10 56 ) 57 58 type volumeStruct struct { 59 volumeGroup *volumeGroupStruct 60 name string 61 fuseMountPointName string 62 nfsExported bool 63 smbShareName string 64 accountName string 65 } 66 67 type volumeGroupStruct struct { 68 peer *peerStruct // == nil if in globals.myVolumeGroupMap 69 name string 70 virtualIPAddr net.IP // Adopts the value of peer.publicIPAddr if not provided 71 volumeMap map[string]*volumeStruct // Key == volumeStruct.name 72 } 73 74 type peerStruct struct { 75 name string 76 publicIPAddr net.IP 77 privateIPAddr net.IP 78 udpAddr *net.UDPAddr 79 curRecvMsgNonce uint64 80 curRecvPacketCount uint8 81 curRecvPacketSumSize uint64 82 curRecvPacketMap map[uint8][]byte // Key is PacketIndex 83 prevRecvMsgQueueElement *recvMsgQueueElementStruct 84 incompleteRecvMsgMap map[uint64]*recvMsgQueueElementStruct // Key == recvMsgQueueElementStruct.msgNonce 85 incompleteRecvMsgQueue *list.List // LRU ordered 86 completeRecvMsgQueue *list.List // FIFO ordered 87 volumeGroupMap map[string]*volumeGroupStruct // Key == volumeGroupStruct.name 88 } 89 90 type internalVolumeReportStruct struct { 91 volumeGroup *internalVolumeGroupReportStruct 92 name string 93 state string // One of const State{Alive|Dead|Unknown} 94 lastCheckTime time.Time 95 } 96 97 type internalVolumeGroupReportStruct struct { 98 servingPeer *internalServingPeerReportStruct 99 name string 100 state string // One of const State{Alive|Dead|Unknown} 101 lastCheckTime time.Time 102 volume map[string]*internalVolumeReportStruct // Key = internalVolumeReportStruct.name 103 } 104 105 type internalServingPeerReportStruct struct { 106 observingPeer *internalObservingPeerReportStruct 107 name string 108 state string // One of const State{Alive|Dead|Unknown} 109 lastCheckTime time.Time 110 volumeGroup map[string]*internalVolumeGroupReportStruct // Key = internalVolumeGroupReportStruct.name 111 } 112 113 type internalReconEndpointReportStruct struct { 114 observingPeer *internalObservingPeerReportStruct 115 ipAddrPort string 116 maxDiskUsagePercentage uint8 117 } 118 119 type internalObservingPeerReportStruct struct { 120 name string 121 servingPeer map[string]*internalServingPeerReportStruct // Key = internalServingPeerReportStruct.name 122 reconEndpoint map[string]*internalReconEndpointReportStruct // Key = internalReconEndpointReportStruct.ipAddrPort 123 } 124 125 type internalLivenessReportStruct struct { 126 observingPeer map[string]*internalObservingPeerReportStruct // Key = internalObservingPeerReportStruct.name 127 } 128 129 type globalsStruct struct { 130 trackedlock.Mutex 131 active bool 132 enabled bool 133 whoAmI string 134 myPublicIPAddr net.IP 135 myPrivateIPAddr net.IP 136 myUDPAddr *net.UDPAddr 137 myUDPConn *net.UDPConn 138 myVolumeGroupMap map[string]*volumeGroupStruct // Key == volumeGroupStruct.name 139 peersByName map[string]*peerStruct // Key == peerStruct.name 140 peersByTuple map[string]*peerStruct // Key == peerStruct.udpAddr.String() (~= peerStruct.tuple) 141 udpPacketSendSize uint64 142 udpPacketSendPayloadSize uint64 143 udpPacketRecvSize uint64 144 udpPacketRecvPayloadSize uint64 145 udpPacketCapPerMessage uint8 146 sendMsgMessageSizeMax uint64 147 heartbeatDuration time.Duration 148 heartbeatMissLimit uint64 149 heartbeatMissDuration time.Duration 150 messageQueueDepthPerPeer uint64 151 maxRequestDuration time.Duration 152 livenessCheckRedundancy uint64 153 logLevel uint64 154 jsonRPCServerPort uint16 155 swiftReconNoWriteThreshold uint8 156 swiftReconReadOnlyThreshold uint8 157 swiftConfDir string 158 swiftReconChecksPerConfCheck uint64 159 swiftReconChecksUntilConfCheck uint64 160 swiftConfFileMap map[string]time.Time // Key == os.FileInfo.Name(); Value == os.FileInfo.ModTime() 161 swiftReconEndpointSet map[string]struct{} // Key == IPAddrPort of ReconEndpoint 162 crc64ECMATable *crc64.Table 163 nextNonce uint64 // Randomly initialized... skips 0 164 recvMsgsDoneChan chan struct{} 165 recvMsgQueue *list.List // FIFO ordered 166 recvMsgChan chan struct{} 167 requestsByExpirationTime *list.List // FIFO ordered 168 requestsByMsgTag map[uint64]*requestStruct // Key == requestStruct.msgTag 169 requestExpirerStartChan chan struct{} // Signaled when inserting the first element of requestsByExpirationTime 170 requestExpirerStopChan chan struct{} // Signaled when asking requestExpirer() to halt 171 requestExpirerDone sync.WaitGroup // Signaled when requestExpirer() has exited 172 currentLeader *peerStruct 173 currentVote *peerStruct 174 currentTerm uint64 175 nextState func() 176 stateMachineStopChan chan struct{} 177 stateMachineDone sync.WaitGroup 178 livenessCheckerControlChan chan bool // Send true to trigger livenessChecker() to recompute polling schedule 179 // Send false to trigger livenessChecker() to exit 180 livenessCheckerWG sync.WaitGroup 181 volumeToCheckList []*volumeStruct 182 emptyVolumeGroupToCheckSet map[string]string // List (in "set" form) of VolumeGroups (by name) with no Volumes (Value == ServingPeer) 183 emptyServingPeerToCheckSet map[string]struct{} // List (in "set" form) of ServingPeers (by name) with no VolumeGroups 184 myObservingPeerReport *internalObservingPeerReportStruct 185 livenessReport *internalLivenessReportStruct 186 curRWMode inode.RWModeType 187 } 188 189 var globals globalsStruct 190 191 func init() { 192 transitions.Register("liveness", &globals) 193 } 194 195 func (dummy *globalsStruct) Up(confMap conf.ConfMap) (err error) { 196 var ( 197 u64RandBuf []byte 198 ) 199 200 // Ensure API behavior is disabled at startup 201 202 globals.active = false 203 204 // Do one-time initialization 205 206 globals.crc64ECMATable = crc64.MakeTable(crc64.ECMA) 207 208 u64RandBuf = make([]byte, 8) 209 _, err = rand.Read(u64RandBuf) 210 if nil != err { 211 err = fmt.Errorf("read.Rand() failed: %v", err) 212 return 213 } 214 globals.nextNonce = deserializeU64LittleEndian(u64RandBuf) 215 if 0 == globals.nextNonce { 216 globals.nextNonce = 1 217 } 218 219 globals.requestsByExpirationTime = list.New() 220 globals.requestsByMsgTag = make(map[uint64]*requestStruct) 221 globals.requestExpirerStartChan = make(chan struct{}, 1) 222 globals.requestExpirerStopChan = make(chan struct{}, 1) 223 224 globals.curRWMode = inode.RWModeNormal 225 inode.SetRWMode(globals.curRWMode) 226 227 globals.requestExpirerDone.Add(1) 228 go requestExpirer() 229 230 globals.livenessCheckerControlChan = make(chan bool, 1) 231 232 err = nil 233 return 234 } 235 236 func (dummy *globalsStruct) VolumeGroupCreated(confMap conf.ConfMap, volumeGroupName string, activePeer string, virtualIPAddr string) (err error) { 237 return nil 238 } 239 func (dummy *globalsStruct) VolumeGroupMoved(confMap conf.ConfMap, volumeGroupName string, activePeer string, virtualIPAddr string) (err error) { 240 return nil 241 } 242 func (dummy *globalsStruct) VolumeGroupDestroyed(confMap conf.ConfMap, volumeGroupName string) (err error) { 243 return nil 244 } 245 func (dummy *globalsStruct) VolumeCreated(confMap conf.ConfMap, volumeName string, volumeGroupName string) (err error) { 246 return nil 247 } 248 func (dummy *globalsStruct) VolumeMoved(confMap conf.ConfMap, volumeName string, volumeGroupName string) (err error) { 249 return nil 250 } 251 func (dummy *globalsStruct) VolumeDestroyed(confMap conf.ConfMap, volumeName string) (err error) { 252 return nil 253 } 254 func (dummy *globalsStruct) ServeVolume(confMap conf.ConfMap, volumeName string) (err error) { 255 return nil 256 } 257 func (dummy *globalsStruct) UnserveVolume(confMap conf.ConfMap, volumeName string) (err error) { 258 return nil 259 } 260 func (dummy *globalsStruct) VolumeToBeUnserved(confMap conf.ConfMap, volumeName string) (err error) { 261 return nil 262 } 263 264 // SignaledStart will be used to halt the cluster leadership process. This is to support 265 // SIGHUP handling incorporates all confMap changes are incorporated... not just during a restart. 266 func (dummy *globalsStruct) SignaledStart(confMap conf.ConfMap) (err error) { 267 var ( 268 stillDeactivating bool 269 ) 270 271 // If the liveness checker is not enabled, stopping it will hang 272 273 if !globals.enabled { 274 return 275 } 276 277 // Disable API behavior as we enter the SIGHUP-handling state 278 279 globals.active = false 280 281 // Stop livenessChecker() 282 283 globals.livenessCheckerControlChan <- false 284 globals.livenessCheckerWG.Wait() 285 286 // Stop state machine 287 288 globals.stateMachineStopChan <- struct{}{} 289 globals.stateMachineDone.Wait() 290 291 // Shut off recvMsgs() 292 293 err = globals.myUDPConn.Close() 294 if nil != err { 295 logger.Errorf("liveness.globals.myUDPConn.Close() failed: %v", err) 296 } 297 298 stillDeactivating = true 299 300 for stillDeactivating { 301 select { 302 case <-globals.recvMsgChan: 303 // Just discard it 304 case <-globals.recvMsgsDoneChan: 305 // Since recvMsgs() exited, we are done deactivating 306 stillDeactivating = false 307 } 308 } 309 310 // Clear out Swift recon settings and computed details 311 312 globals.swiftReconNoWriteThreshold = 101 // Never enforce NoWrite Mode 313 globals.swiftReconReadOnlyThreshold = 101 // Never enforce ReadOnly Mode 314 globals.swiftConfDir = "" 315 globals.swiftReconChecksPerConfCheck = 0 // Disabled 316 317 // Free up remaining allocated resources 318 319 globals.myVolumeGroupMap = nil 320 321 globals.peersByName = nil 322 globals.peersByTuple = nil 323 324 globals.recvMsgQueue = list.New() 325 326 globals.myObservingPeerReport = nil 327 globals.livenessReport = nil 328 329 globals.volumeToCheckList = nil 330 globals.emptyVolumeGroupToCheckSet = nil 331 globals.emptyServingPeerToCheckSet = nil 332 333 globals.myObservingPeerReport = nil 334 globals.livenessReport = nil 335 336 err = nil 337 return 338 } 339 340 // SignaledFinish will be used to kick off the cluster leadership process. This is to support 341 // SIGHUP handling incorporates all confMap changes are incorporated... not just during a restart. 342 func (dummy *globalsStruct) SignaledFinish(confMap conf.ConfMap) (err error) { 343 var ( 344 myTuple string 345 ok bool 346 enabled bool 347 peer *peerStruct 348 peerName string 349 peerList []string 350 peerTuple string 351 privateClusterUDPPortAsString string 352 privateClusterUDPPortAsUint16 uint16 353 privateIPAddr string 354 publicIPAddr string 355 virtualIPAddr string 356 volume *volumeStruct 357 volumeGroup *volumeGroupStruct 358 volumeGroupList []string 359 volumeGroupName string 360 volumeList []string 361 volumeName string 362 ) 363 364 // see if liveness checker is enabled 365 globals.enabled = false 366 enabled, err = confMap.FetchOptionValueBool("Cluster", "LivenessCheckerEnabled") 367 if nil != err { 368 logger.InfoWithError(err, "Unable to find and/or parse [Cluster]LivenessCheckerEnabled;"+ 369 " defaulting to disabled") 370 err = nil 371 return 372 } 373 if !enabled { 374 logger.Infof("Liveness checker disabled") 375 return 376 } 377 378 // don't set globals.enabled = true until it's actually started 379 logger.Infof("Liveness checker will be enabled") 380 381 // Fetch cluster parameters 382 383 privateClusterUDPPortAsUint16, err = confMap.FetchOptionValueUint16("Cluster", "PrivateClusterUDPPort") 384 if nil != err { 385 privateClusterUDPPortAsUint16 = PrivateClusterUDPPortDefault // TODO: Eventually just return 386 } 387 privateClusterUDPPortAsString = fmt.Sprintf("%d", privateClusterUDPPortAsUint16) 388 389 globals.whoAmI, err = confMap.FetchOptionValueString("Cluster", "WhoAmI") 390 if nil != err { 391 return 392 } 393 394 publicIPAddr, err = confMap.FetchOptionValueString("Peer:"+globals.whoAmI, "PublicIPAddr") 395 if nil != err { 396 return 397 } 398 399 globals.myPublicIPAddr = net.ParseIP(publicIPAddr) 400 if nil == globals.myPublicIPAddr { 401 err = fmt.Errorf("Unable to parse myPublicIPAddr") 402 return 403 } 404 405 privateIPAddr, err = confMap.FetchOptionValueString("Peer:"+globals.whoAmI, "PrivateIPAddr") 406 if nil != err { 407 return 408 } 409 410 globals.myPrivateIPAddr = net.ParseIP(privateIPAddr) 411 if nil == globals.myPrivateIPAddr { 412 err = fmt.Errorf("Unable to parse myPrivateIPAddr") 413 return 414 } 415 416 myTuple = net.JoinHostPort(privateIPAddr, privateClusterUDPPortAsString) 417 418 globals.myUDPAddr, err = net.ResolveUDPAddr("udp", myTuple) 419 if nil != err { 420 err = fmt.Errorf("Cannot parse myTuple (%s): %v", myTuple, err) 421 return 422 } 423 424 globals.myUDPConn, err = net.ListenUDP("udp", globals.myUDPAddr) 425 if nil != err { 426 err = fmt.Errorf("Cannot bind to myTuple (%v): %v", globals.myUDPAddr, err) 427 return 428 } 429 430 globals.myVolumeGroupMap = make(map[string]*volumeGroupStruct) 431 432 peerList, err = confMap.FetchOptionValueStringSlice("Cluster", "Peers") 433 if nil != err { 434 return 435 } 436 437 globals.peersByName = make(map[string]*peerStruct) 438 globals.peersByTuple = make(map[string]*peerStruct) 439 440 // Initialize emptyServingPeerToCheckSet with all ServingPeers (including self) 441 // This set will be pruned later as VolumeGroups are assigned to a ServingPeer 442 443 globals.emptyServingPeerToCheckSet = make(map[string]struct{}) 444 445 for _, peerName = range peerList { 446 globals.emptyServingPeerToCheckSet[peerName] = struct{}{} 447 448 if peerName != globals.whoAmI { 449 peer = &peerStruct{ 450 name: peerName, 451 curRecvMsgNonce: 0, 452 curRecvPacketCount: 0, 453 curRecvPacketSumSize: 0, 454 curRecvPacketMap: nil, 455 prevRecvMsgQueueElement: nil, 456 incompleteRecvMsgMap: make(map[uint64]*recvMsgQueueElementStruct), 457 incompleteRecvMsgQueue: list.New(), 458 completeRecvMsgQueue: list.New(), 459 volumeGroupMap: make(map[string]*volumeGroupStruct), 460 } 461 462 publicIPAddr, err = confMap.FetchOptionValueString("Peer:"+peerName, "PublicIPAddr") 463 if nil != err { 464 return 465 } 466 467 peer.publicIPAddr = net.ParseIP(publicIPAddr) 468 if nil == peer.publicIPAddr { 469 err = fmt.Errorf("Cannot parse [Peer:%v]PublicIPAddr", peerName) 470 return 471 } 472 473 privateIPAddr, err = confMap.FetchOptionValueString("Peer:"+peerName, "PrivateIPAddr") 474 if nil != err { 475 return 476 } 477 478 peer.privateIPAddr = net.ParseIP(privateIPAddr) 479 if nil == peer.privateIPAddr { 480 err = fmt.Errorf("Cannot parse [Peer:%v]PrivateIPAddr", peerName) 481 return 482 } 483 484 peerTuple = net.JoinHostPort(privateIPAddr, privateClusterUDPPortAsString) 485 486 peer.udpAddr, err = net.ResolveUDPAddr("udp", peerTuple) 487 if nil != err { 488 err = fmt.Errorf("Cannot parse peerTuple (%s): %v", peerTuple, err) 489 return 490 } 491 492 if globals.myUDPAddr.String() == peer.udpAddr.String() { 493 err = fmt.Errorf("peerTuple cannot match myTuple (%v)", globals.myUDPAddr) 494 return 495 } 496 _, ok = globals.peersByName[peer.name] 497 if ok { 498 err = fmt.Errorf("peerName must not match multiple peers (%v)", peer.name) 499 return 500 } 501 _, ok = globals.peersByTuple[peer.udpAddr.String()] 502 if ok { 503 err = fmt.Errorf("peerTuple must not match multiple peers (%v)", peer.udpAddr) 504 return 505 } 506 507 globals.peersByName[peer.name] = peer 508 globals.peersByTuple[peer.udpAddr.String()] = peer 509 } 510 } 511 512 globals.udpPacketSendSize, err = confMap.FetchOptionValueUint64("Cluster", "UDPPacketSendSize") 513 if nil != err { 514 globals.udpPacketSendSize = UDPPacketSendSizeDefault // TODO: Eventually just return 515 } 516 if (globals.udpPacketSendSize < UDPPacketSizeMin) || (globals.udpPacketSendSize > UDPPacketSizeMax) { 517 err = fmt.Errorf("udpPacketSendSize (%v) must be between %v and %v (inclusive)", globals.udpPacketSendSize, UDPPacketSizeMin, UDPPacketSizeMax) 518 return 519 } 520 521 globals.udpPacketSendPayloadSize = globals.udpPacketSendSize - udpPacketHeaderSize 522 523 globals.udpPacketRecvSize, err = confMap.FetchOptionValueUint64("Cluster", "UDPPacketRecvSize") 524 if nil != err { 525 globals.udpPacketRecvSize = UDPPacketRecvSizeDefault // TODO: Eventually just return 526 } 527 if (globals.udpPacketRecvSize < UDPPacketSizeMin) || (globals.udpPacketRecvSize > UDPPacketSizeMax) { 528 err = fmt.Errorf("udpPacketRecvSize (%v) must be between %v and %v (inclusive)", globals.udpPacketRecvSize, UDPPacketSizeMin, UDPPacketSizeMax) 529 return 530 } 531 532 globals.udpPacketRecvPayloadSize = globals.udpPacketRecvSize - udpPacketHeaderSize 533 534 globals.udpPacketCapPerMessage, err = confMap.FetchOptionValueUint8("Cluster", "UDPPacketCapPerMessage") 535 if nil != err { 536 globals.udpPacketCapPerMessage = UDPPacketCapPerMessageDefault // TODO: Eventually just return 537 } 538 if 0 == globals.udpPacketCapPerMessage { 539 err = fmt.Errorf("udpPacketCapPerMessage must be non-zero") 540 return 541 } 542 543 globals.sendMsgMessageSizeMax = uint64(globals.udpPacketCapPerMessage) * globals.udpPacketSendPayloadSize 544 545 globals.heartbeatDuration, err = confMap.FetchOptionValueDuration("Cluster", "HeartBeatDuration") 546 if nil != err { 547 // TODO: Eventually just return 548 globals.heartbeatDuration, err = time.ParseDuration(HeartBeatDurationDefault) 549 if nil != err { 550 return 551 } 552 } 553 if time.Duration(0) == globals.heartbeatDuration { 554 err = fmt.Errorf("heartbeatDuration must be non-zero") 555 return 556 } 557 558 globals.heartbeatMissLimit, err = confMap.FetchOptionValueUint64("Cluster", "HeartBeatMissLimit") 559 if nil != err { 560 globals.heartbeatMissLimit = HeartBeatMissLimitDefault // TODO: Eventually just return 561 } 562 if globals.heartbeatMissLimit < HeartBeatMissLimitMin { 563 err = fmt.Errorf("heartbeatMissLimit (%v) must be at least %v", globals.heartbeatMissLimit, HeartBeatMissLimitMin) 564 return 565 } 566 567 globals.heartbeatMissDuration = time.Duration(globals.heartbeatMissLimit) * globals.heartbeatDuration 568 569 globals.messageQueueDepthPerPeer, err = confMap.FetchOptionValueUint64("Cluster", "MessageQueueDepthPerPeer") 570 if nil != err { 571 globals.messageQueueDepthPerPeer = MessageQueueDepthPerPeerDefault // TODO: Eventually just return 572 } 573 if globals.messageQueueDepthPerPeer < MessageQueueDepthPerPeerMin { 574 err = fmt.Errorf("messageQueueDepthPerPeer (%v) must be at least %v", globals.messageQueueDepthPerPeer, MessageQueueDepthPerPeerMin) 575 return 576 } 577 578 globals.maxRequestDuration, err = confMap.FetchOptionValueDuration("Cluster", "MaxRequestDuration") 579 if nil != err { 580 // TODO: Eventually just return 581 globals.maxRequestDuration, err = time.ParseDuration(MaxRequestDurationDefault) 582 if nil != err { 583 return 584 } 585 } 586 if time.Duration(0) == globals.maxRequestDuration { 587 err = fmt.Errorf("maxRequestDuration must be non-zero") 588 return 589 } 590 591 globals.livenessCheckRedundancy, err = confMap.FetchOptionValueUint64("Cluster", "LivenessCheckRedundancy") 592 if nil != err { 593 globals.livenessCheckRedundancy = LivenessCheckRedundancyDefault // TODO: Eventually just return 594 } 595 if globals.livenessCheckRedundancy < LivenessCheckRedundancyMin { 596 err = fmt.Errorf("livenessCheckRedundancy (%v) must be at least %v", globals.livenessCheckRedundancy, LivenessCheckRedundancyMin) 597 return 598 } 599 600 // Set LogLevel as specified or use default 601 602 globals.logLevel, err = confMap.FetchOptionValueUint64("Cluster", "LogLevel") 603 if nil != err { 604 globals.logLevel = LogLevelDefault 605 } 606 if globals.logLevel > LogLevelMax { 607 err = fmt.Errorf("logLevel (%v) must be between 0 and %v (inclusive)", globals.logLevel, LogLevelMax) 608 return 609 } 610 611 // Record current Peer->VolumeGroup->Volume mapping 612 613 globals.volumeToCheckList = make([]*volumeStruct, 0) 614 615 volumeGroupList, err = confMap.FetchOptionValueStringSlice("FSGlobals", "VolumeGroupList") 616 if nil != err { 617 return 618 } 619 620 globals.emptyVolumeGroupToCheckSet = make(map[string]string) 621 622 for _, volumeGroupName = range volumeGroupList { 623 virtualIPAddr, err = confMap.FetchOptionValueString("VolumeGroup:"+volumeGroupName, "VirtualIPAddr") 624 if nil != err { 625 virtualIPAddr = "" 626 } 627 628 peerList, err = confMap.FetchOptionValueStringSlice("VolumeGroup:"+volumeGroupName, "PrimaryPeer") 629 if nil != err { 630 return 631 } 632 633 switch len(peerList) { 634 case 0: 635 // Since VolumeGroup has no PrimaryPeer, just skip it 636 case 1: 637 // Include this VolumeGroup 638 639 peerName = peerList[0] 640 641 delete(globals.emptyServingPeerToCheckSet, peerName) 642 643 if peerName == globals.whoAmI { 644 _, ok = globals.myVolumeGroupMap[volumeGroupName] 645 if ok { 646 err = fmt.Errorf("Duplicate VolumeGroup (%v) not allowed", volumeGroupName) 647 return 648 } 649 650 volumeGroup = &volumeGroupStruct{ 651 peer: nil, 652 name: volumeGroupName, 653 volumeMap: make(map[string]*volumeStruct), 654 } 655 656 if "" == virtualIPAddr { 657 volumeGroup.virtualIPAddr = globals.myPublicIPAddr 658 } else { 659 660 // virtualIPAddr must be a valid IP address or valid 661 // IP address in CIDR notation 662 volumeGroup.virtualIPAddr = net.ParseIP(virtualIPAddr) 663 if nil == volumeGroup.virtualIPAddr { 664 665 volumeGroup.virtualIPAddr, _, err = net.ParseCIDR(virtualIPAddr) 666 if err != nil { 667 err = fmt.Errorf("Cannot parse [VolumeGroup:%v]VirtualIPAddr: '%s' "+ 668 " as IP address or CIDR IP address: %v", 669 volumeGroupName, virtualIPAddr, err) 670 return 671 } 672 } 673 } 674 675 globals.myVolumeGroupMap[volumeGroupName] = volumeGroup 676 } else { 677 peer, ok = globals.peersByName[peerName] 678 if !ok { 679 err = fmt.Errorf("[VolumeGroup:%v]PrimaryPeer (%v) not found in [Cluster]Peers", volumeGroupName, peerName) 680 return 681 } 682 683 _, ok = peer.volumeGroupMap[volumeGroupName] 684 if ok { 685 err = fmt.Errorf("Duplicate VolumeGroup (%v) not allowed", volumeGroupName) 686 return 687 } 688 689 volumeGroup = &volumeGroupStruct{ 690 peer: peer, 691 name: volumeGroupName, 692 volumeMap: make(map[string]*volumeStruct), 693 } 694 695 if "" == virtualIPAddr { 696 volumeGroup.virtualIPAddr = peer.publicIPAddr 697 } else { 698 699 // virtualIPAddr must be a valid IP address or valid 700 // IP address in CIDR notation 701 volumeGroup.virtualIPAddr = net.ParseIP(virtualIPAddr) 702 if nil == volumeGroup.virtualIPAddr { 703 704 volumeGroup.virtualIPAddr, _, err = net.ParseCIDR(virtualIPAddr) 705 if err != nil { 706 err = fmt.Errorf("Cannot parse [VolumeGroup:%v]VirtualIPAddr: '%s' "+ 707 " as IP address or CIDR IP address: %v", 708 volumeGroupName, virtualIPAddr, err) 709 return 710 } 711 } 712 } 713 714 peer.volumeGroupMap[volumeGroupName] = volumeGroup 715 } 716 717 volumeList, err = confMap.FetchOptionValueStringSlice("VolumeGroup:"+volumeGroupName, "VolumeList") 718 if nil != err { 719 return 720 } 721 722 if 0 == len(volumeList) { 723 globals.emptyVolumeGroupToCheckSet[volumeGroupName] = peerName 724 } else { 725 for _, volumeName = range volumeList { 726 _, ok = volumeGroup.volumeMap[volumeName] 727 if ok { 728 err = fmt.Errorf("[VolumeGroup:%v]VolumeList contains Volume %v more than once", volumeGroupName, volumeName) 729 return 730 } 731 732 volume = &volumeStruct{ 733 volumeGroup: volumeGroup, 734 name: volumeName, 735 } 736 737 volume.fuseMountPointName, err = confMap.FetchOptionValueString("Volume:"+volumeName, "FUSEMountPointName") 738 if nil != err { 739 return 740 } 741 742 volume.nfsExported, err = confMap.FetchOptionValueBool("Volume:"+volumeName, "NFSExported") 743 if nil != err { 744 // Default to no NFS Export 745 volume.nfsExported = false 746 } 747 748 volume.smbShareName, err = confMap.FetchOptionValueString("Volume:"+volumeName, "SMBShareName") 749 if nil != err { 750 // Default to no SMB Share 751 volume.smbShareName = "" 752 } 753 754 volume.accountName, err = confMap.FetchOptionValueString("Volume:"+volumeName, "AccountName") 755 if nil != err { 756 return 757 } 758 759 volumeGroup.volumeMap[volumeName] = volume 760 761 globals.volumeToCheckList = append(globals.volumeToCheckList, volume) 762 } 763 } 764 default: 765 err = fmt.Errorf("[VolumeGroup:%s]PrimaryPeer must be empty or single-valued", volumeGroupName) 766 return 767 } 768 } 769 770 // Fetch JSON RPC Port to be used when polling Peers 771 772 globals.jsonRPCServerPort, err = confMap.FetchOptionValueUint16("JSONRPCServer", "TCPPort") 773 if nil != err { 774 return 775 } 776 777 // Fetch Swift recon settings 778 779 err = confMap.VerifyOptionIsMissing("SwiftClient", "SwiftReconChecksPerConfCheck") 780 if nil == err { 781 logger.WarnfWithError(err, "Unable to fetch [SwiftClient]SwiftReconChecksPerConfCheck... defaulting to %d", DefaultSwiftReconChecksPerConfCheck) 782 globals.swiftReconChecksPerConfCheck = DefaultSwiftReconChecksPerConfCheck 783 } else { 784 globals.swiftReconChecksPerConfCheck, err = confMap.FetchOptionValueUint64("SwiftClient", "SwiftReconChecksPerConfCheck") 785 if nil != err { 786 logger.ErrorfWithError(err, "Unable to parse [SwiftClient]SwiftReconChecksPerConfCheck") 787 return 788 } 789 } 790 791 if 0 == globals.swiftReconChecksPerConfCheck { 792 logger.Warnf("[SwiftClient]SwiftReconChecksPerConfCheck == 0... disabling recon checks") 793 } else { 794 globals.swiftReconNoWriteThreshold, err = confMap.FetchOptionValueUint8("SwiftClient", "SwiftReconNoWriteThreshold") 795 if nil == err { 796 if 100 < globals.swiftReconNoWriteThreshold { 797 err = fmt.Errorf("[SwiftClient]SwiftReconNoWriteThreshold cannot be greater than 100") 798 return 799 } 800 } else { 801 logger.WarnfWithError(err, "Unable to fetch [SwiftClient]SwiftReconNoWriteThreshold... defaulting to %d", DefaultSwiftReconNoWriteThreshold) 802 globals.swiftReconNoWriteThreshold = DefaultSwiftReconNoWriteThreshold 803 } 804 805 globals.swiftReconReadOnlyThreshold, err = confMap.FetchOptionValueUint8("SwiftClient", "SwiftReconReadOnlyThreshold") 806 if nil == err { 807 if 100 < globals.swiftReconReadOnlyThreshold { 808 err = fmt.Errorf("[SwiftClient]SwiftReconReadOnlyThreshold cannot be greater than 100") 809 return 810 } 811 if globals.swiftReconReadOnlyThreshold < globals.swiftReconNoWriteThreshold { 812 err = fmt.Errorf("[SwiftClient]SwiftReconReadOnlyThreshold cannot be less than [SwiftClient]SwiftReconNoWriteThreshold") 813 return 814 } 815 } else { 816 if globals.swiftReconNoWriteThreshold > DefaultSwiftReconReadOnlyThreshold { 817 logger.WarnfWithError(err, "Unable to fetch [SwiftClient]SwiftReconReadOnlyThreadhold... defaulting to %d", globals.swiftReconNoWriteThreshold) 818 globals.swiftReconReadOnlyThreshold = globals.swiftReconNoWriteThreshold 819 } else { 820 logger.WarnfWithError(err, "Unable to fetch [SwiftClient]SwiftReconReadOnlyThreadhold... defaulting to %d", DefaultSwiftReconReadOnlyThreshold) 821 globals.swiftReconReadOnlyThreshold = DefaultSwiftReconReadOnlyThreshold 822 } 823 } 824 825 globals.swiftConfDir, err = confMap.FetchOptionValueString("SwiftClient", "SwiftConfDir") 826 if nil != err { 827 logger.WarnfWithError(err, "Unable to fetch [SwiftClient]SwiftConfDir... defaulting to %s", DefaultSwiftConfDir) 828 globals.swiftConfDir = DefaultSwiftConfDir 829 } 830 } 831 832 // the liveness checker will be enabled (no more error out cases) 833 834 globals.enabled = true 835 836 // Initialize remaining globals 837 838 globals.swiftReconChecksUntilConfCheck = 0 // First ReconCheck will trigger a ConfCheck 839 globals.swiftConfFileMap = make(map[string]time.Time) 840 841 globals.recvMsgQueue = list.New() 842 843 globals.recvMsgChan = make(chan struct{}, 1) 844 845 globals.recvMsgsDoneChan = make(chan struct{}, 1) 846 go recvMsgs() 847 848 globals.currentLeader = nil 849 globals.currentVote = nil 850 globals.currentTerm = 0 851 852 globals.nextState = doFollower 853 854 globals.stateMachineStopChan = make(chan struct{}, 1) 855 856 // Initialize internal Liveness Report data as being empty 857 858 globals.myObservingPeerReport = nil 859 globals.livenessReport = nil 860 861 // Start up livenessChecker() 862 863 globals.livenessCheckerWG.Add(1) 864 go livenessChecker() 865 866 // Become an active participant in the cluster 867 868 globals.stateMachineDone.Add(1) 869 go stateMachine() 870 871 // Enable API behavior as we leave the SIGHUP-handling state 872 873 globals.active = true 874 875 err = nil 876 return 877 } 878 879 func (dummy *globalsStruct) Down(confMap conf.ConfMap) (err error) { 880 globals.requestExpirerStopChan <- struct{}{} 881 globals.requestExpirerDone.Wait() 882 883 return nil 884 }