github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/liveness/polling.go (about)

     1  package liveness
     2  
     3  import (
     4  	"bytes"
     5  	"compress/gzip"
     6  	"container/list"
     7  	"encoding/binary"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io/ioutil"
    11  	"math/big"
    12  	"net"
    13  	"net/http"
    14  	"os"
    15  	"reflect"
    16  	"regexp"
    17  	"time"
    18  
    19  	"github.com/swiftstack/ProxyFS/jrpcfs"
    20  	"github.com/swiftstack/ProxyFS/logger"
    21  	"github.com/swiftstack/ProxyFS/utils"
    22  )
    23  
    24  type pingReqStruct struct {
    25  	JSONrpc string            `json:"jsonrpc"`
    26  	Method  string            `json:"method"`
    27  	Params  [1]jrpcfs.PingReq `json:"params"`
    28  	ID      uint64            `json:"id"`
    29  }
    30  
    31  type pingReplyStruct struct {
    32  	ID     uint64           `json:"id"`
    33  	Result jrpcfs.PingReply `json:"result"`
    34  	Error  string           `json:"error"`
    35  }
    36  
    37  type ringFilePayloadJSONDevStruct struct {
    38  	IP   string `json:"ip"`
    39  	Port uint16 `json:"port"`
    40  }
    41  
    42  type ringFilePayloadJSONStruct struct {
    43  	Devs []*ringFilePayloadJSONDevStruct `json:"devs"`
    44  }
    45  
    46  type reconDevReportStruct struct {
    47  	Size int64 `json:"size"`
    48  	Used int64 `json:"used"`
    49  }
    50  
    51  const maxRPCReplySize = 4096
    52  
    53  func livenessChecker() {
    54  	var (
    55  		checkEntityList                 *list.List
    56  		durationPerCheck                time.Duration
    57  		entitiesToCheck                 uint64
    58  		entityToCheck                   *list.Element
    59  		err                             error
    60  		livenessCheckerControlChanValue bool
    61  		reconEndpoint                   *internalReconEndpointReportStruct
    62  		servingPeer                     *internalServingPeerReportStruct
    63  		volume                          *internalVolumeReportStruct
    64  		volumeGroup                     *internalVolumeGroupReportStruct
    65  	)
    66  
    67  	for {
    68  		globals.Lock()
    69  
    70  		if nil == globals.myObservingPeerReport {
    71  			// Just wait to be signaled to check again... or exit
    72  			globals.Unlock()
    73  			livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan
    74  			if livenessCheckerControlChanValue {
    75  				// Just loop back and re-check globals.myObservingPeerReport
    76  			} else {
    77  				// Exit
    78  				globals.livenessCheckerWG.Done()
    79  				return
    80  			}
    81  		} else { // nil != globals.myObservingPeerReport
    82  			// Check to see if we are supposed to exit
    83  
    84  			select {
    85  			case livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan:
    86  				if livenessCheckerControlChanValue {
    87  					// Just fall into checkEntityList processing
    88  				} else {
    89  					// Exit
    90  					globals.Unlock()
    91  					globals.livenessCheckerWG.Done()
    92  					return
    93  				}
    94  			default:
    95  				// Just fall into checkEntityList processing
    96  			}
    97  
    98  			// Compute randomly-ordered list of entities to check
    99  
   100  			checkEntityList = list.New()
   101  
   102  			for _, servingPeer = range globals.myObservingPeerReport.servingPeer {
   103  				_ = checkEntityList.PushBack(servingPeer)
   104  
   105  				for _, volumeGroup = range servingPeer.volumeGroup {
   106  					_ = checkEntityList.PushBack(volumeGroup)
   107  
   108  					for _, volume = range volumeGroup.volume {
   109  						_ = checkEntityList.PushBack(volume)
   110  					}
   111  				}
   112  			}
   113  
   114  			for _, reconEndpoint = range globals.myObservingPeerReport.reconEndpoint {
   115  				_ = checkEntityList.PushBack(reconEndpoint)
   116  			}
   117  
   118  			globals.Unlock()
   119  
   120  			utils.RandomizeList(checkEntityList)
   121  
   122  			// Compute number of entities to check & time between each check
   123  			// Allow for one extra time slice to hopefully get all entities checked
   124  
   125  			entitiesToCheck = uint64(checkEntityList.Len())
   126  
   127  			durationPerCheck = globals.heartbeatDuration / time.Duration(entitiesToCheck+1)
   128  
   129  			// Perform each check spaced out by durationPerCheck
   130  
   131  			entityToCheck = checkEntityList.Front()
   132  
   133  			for nil != entityToCheck {
   134  				switch reflect.TypeOf(entityToCheck.Value) {
   135  				case reflect.TypeOf(servingPeer):
   136  					livenessCheckServingPeer(entityToCheck.Value.(*internalServingPeerReportStruct))
   137  				case reflect.TypeOf(volumeGroup):
   138  					livenessCheckVolumeGroup(entityToCheck.Value.(*internalVolumeGroupReportStruct))
   139  				case reflect.TypeOf(volume):
   140  					livenessCheckVolume(entityToCheck.Value.(*internalVolumeReportStruct))
   141  				case reflect.TypeOf(reconEndpoint):
   142  					livenessCheckReconEndpoint(entityToCheck.Value.(*internalReconEndpointReportStruct))
   143  				default:
   144  					err = fmt.Errorf("Unrecognized reflect.TypeOf(entityToCheck.Value): %v", reflect.TypeOf(entityToCheck.Value))
   145  					panic(err)
   146  				}
   147  
   148  				// Delay before next entityToCheck
   149  
   150  				select {
   151  				case livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan:
   152  					if livenessCheckerControlChanValue {
   153  						// Just finish current checks before re-checking globals.myObservingPeerReport
   154  					} else {
   155  						// Exit
   156  						globals.livenessCheckerWG.Done()
   157  						return
   158  					}
   159  				case <-time.After(durationPerCheck):
   160  					// Proceed to next check
   161  				}
   162  
   163  				// Loop back for next entityToCheck
   164  
   165  				entityToCheck = entityToCheck.Next()
   166  			}
   167  		}
   168  	}
   169  }
   170  
   171  func livenessCheckServingPeer(servingPeer *internalServingPeerReportStruct) {
   172  	var (
   173  		err              error
   174  		nextID           uint64
   175  		pingReply        pingReplyStruct
   176  		pingReplyBuf     []byte
   177  		pingReplyLen     int
   178  		pingReq          pingReqStruct
   179  		pingReqBuf       []byte
   180  		servingPeerState string
   181  		tcpAddr          *net.TCPAddr
   182  		tcpAddrToResolve string
   183  		tcpConn          *net.TCPConn
   184  		timeNow          time.Time
   185  	)
   186  
   187  	// Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown)
   188  
   189  	timeNow = time.Now()
   190  	servingPeerState = StateUnknown
   191  
   192  	defer func() {
   193  		globals.Lock()
   194  		servingPeer.state = servingPeerState
   195  		servingPeer.lastCheckTime = timeNow
   196  		globals.Unlock()
   197  	}()
   198  
   199  	// Form RpcPing message to poll servingPeer's JSONRPCServer.TCPPort with
   200  
   201  	pingReq.JSONrpc = "2.0"
   202  	pingReq.Method = "Server.RpcPing"
   203  	pingReq.Params[0].Message = "Ping at " + timeNow.Format(time.RFC3339)
   204  	pingReq.ID = nextID
   205  
   206  	pingReqBuf, err = json.Marshal(pingReq)
   207  	if nil != err {
   208  		err = fmt.Errorf("json.Marshal(pingReq) failed: %v", err)
   209  		logger.Error(err)
   210  		return
   211  	}
   212  
   213  	// Compute tcpAddr of servingPeer's JSONRPCServer.TCPPort
   214  
   215  	if servingPeer.name == globals.whoAmI {
   216  		tcpAddrToResolve = net.JoinHostPort(globals.myPrivateIPAddr.String(), fmt.Sprintf("%d", globals.jsonRPCServerPort))
   217  	} else {
   218  		tcpAddrToResolve = net.JoinHostPort(globals.peersByName[servingPeer.name].privateIPAddr.String(), fmt.Sprintf("%d", globals.jsonRPCServerPort))
   219  	}
   220  
   221  	tcpAddr, err = net.ResolveTCPAddr("tcp", tcpAddrToResolve)
   222  
   223  	if nil != err {
   224  		err = fmt.Errorf("net.ResolveTCPAddr(\"tcp\", \"%v\") failed: %v", tcpAddrToResolve, err)
   225  		logger.Error(err)
   226  		return
   227  	}
   228  
   229  	// Perform RpcPing... from here on, default servingPeer.state == StateDead
   230  
   231  	servingPeerState = StateDead
   232  
   233  	tcpConn, err = net.DialTCP("tcp", nil, tcpAddr)
   234  	if nil != err {
   235  		return
   236  	}
   237  
   238  	err = tcpConn.SetDeadline(time.Now().Add(globals.maxRequestDuration))
   239  	if nil != err {
   240  		return
   241  	}
   242  
   243  	_, err = tcpConn.Write(pingReqBuf)
   244  	if nil != err {
   245  		return
   246  	}
   247  
   248  	pingReplyBuf = make([]byte, maxRPCReplySize)
   249  
   250  	pingReplyLen, err = tcpConn.Read(pingReplyBuf)
   251  	if nil != err {
   252  		return
   253  	}
   254  
   255  	err = tcpConn.Close()
   256  	if nil != err {
   257  		return
   258  	}
   259  
   260  	pingReplyBuf = pingReplyBuf[:pingReplyLen]
   261  
   262  	err = json.Unmarshal(pingReplyBuf, &pingReply)
   263  	if nil != err {
   264  		return
   265  	}
   266  
   267  	// RpcPing worked... so ensure servingPeer.state == StateAlive
   268  
   269  	servingPeerState = StateAlive
   270  }
   271  
   272  func livenessCheckVolumeGroup(volumeGroup *internalVolumeGroupReportStruct) {
   273  	var (
   274  		volumeGroupState string
   275  		timeNow          time.Time
   276  	)
   277  
   278  	// Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown)
   279  
   280  	timeNow = time.Now()
   281  	volumeGroupState = StateUnknown
   282  
   283  	defer func() {
   284  		globals.Lock()
   285  		volumeGroup.state = volumeGroupState
   286  		volumeGroup.lastCheckTime = timeNow
   287  		globals.Unlock()
   288  	}()
   289  
   290  	// TODO: Implement livenessCheckVolumeGroup()
   291  }
   292  
   293  func livenessCheckVolume(volume *internalVolumeReportStruct) {
   294  	var (
   295  		volumeState string
   296  		timeNow     time.Time
   297  	)
   298  
   299  	// Setup exit path to atomically update servingPeer (initially servingPeer.state == StateUnknown)
   300  
   301  	timeNow = time.Now()
   302  	volumeState = StateUnknown
   303  
   304  	defer func() {
   305  		globals.Lock()
   306  		volume.state = volumeState
   307  		volume.lastCheckTime = timeNow
   308  		globals.Unlock()
   309  	}()
   310  
   311  	// TODO: Implement livenessCheckVolume()
   312  }
   313  
   314  func livenessCheckReconEndpoint(reconEndpoint *internalReconEndpointReportStruct) {
   315  	var (
   316  		bigDividend         *big.Int
   317  		bigDivisor          *big.Int
   318  		bigQuotient         *big.Int
   319  		bigRemainder        *big.Int
   320  		devUtilization      uint8
   321  		err                 error
   322  		quotient            int64
   323  		reconDevReport      *reconDevReportStruct
   324  		reconDevReportSlice []*reconDevReportStruct
   325  		reconResp           *http.Response
   326  		reconRespBody       []byte
   327  		remainder           int64
   328  		url                 string
   329  	)
   330  
   331  	reconEndpoint.maxDiskUsagePercentage = 0
   332  
   333  	url = fmt.Sprintf("http://%s/recon/diskusage", reconEndpoint.ipAddrPort)
   334  
   335  	reconResp, err = http.Get(url)
   336  	if nil == err {
   337  		reconRespBody, err = ioutil.ReadAll(reconResp.Body)
   338  		if nil == err {
   339  			if http.StatusOK == reconResp.StatusCode {
   340  				reconDevReportSlice = make([]*reconDevReportStruct, 0)
   341  				err = json.Unmarshal(reconRespBody, &reconDevReportSlice)
   342  				if nil == err {
   343  					for _, reconDevReport = range reconDevReportSlice {
   344  						if (reconDevReport.Used > 0) && (reconDevReport.Size > 0) && (reconDevReport.Used <= reconDevReport.Size) {
   345  							bigDividend = new(big.Int).Mul(big.NewInt(100), big.NewInt(reconDevReport.Used))
   346  							bigDivisor = big.NewInt(reconDevReport.Size)
   347  							bigQuotient = new(big.Int).Quo(bigDividend, bigDivisor)
   348  							bigRemainder = new(big.Int).Rem(bigDividend, bigDivisor)
   349  							quotient = bigQuotient.Int64()
   350  							remainder = bigRemainder.Int64()
   351  							if 0 == remainder {
   352  								devUtilization = uint8(quotient)
   353  							} else {
   354  								devUtilization = uint8(quotient) + 1
   355  							}
   356  							if devUtilization > reconEndpoint.maxDiskUsagePercentage {
   357  								reconEndpoint.maxDiskUsagePercentage = devUtilization
   358  							}
   359  						} else {
   360  							logger.Warnf("livenessCheckReconEndpoint() GET to %s got responseBody with unreasonable used and size values", url)
   361  						}
   362  					}
   363  				} else {
   364  					logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s got response.Body with invalid JSON", url)
   365  				}
   366  			} else {
   367  				logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s got bad status: %s", url, reconResp.Status)
   368  			}
   369  		} else {
   370  			logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s response.Body() read failed", url)
   371  		}
   372  		err = reconResp.Body.Close()
   373  		if nil != err {
   374  			logger.WarnfWithError(err, "livenessCheckReconEndpoint() GET to %s response.Body.Close() failed", url)
   375  		}
   376  	} else {
   377  		logger.WarnfWithError(err, "livenessCheckReconEndpoint() failed to issue GET to %s", url)
   378  	}
   379  }
   380  
   381  // computeLivenessCheckAssignments takes a list of ObservingPeer and produces a
   382  // template internalLivenessReport that is to be filled in by this collection of peers.
   383  // While the elements of the resultant internalLivenessReport have State, LastCheckTime,
   384  // and MaxDiskUsagePercentage fields, these are ignored as they will ultimately be filled
   385  // in by each ObservingPeer. The livenessCheckRedundancy is used to ensure that each
   386  // ServingPeer, VolumeGroup, Volume, and ReconEndpoint is adequately covered. As every
   387  // Volume is part of a VolumeGroup and every VolumeGroup is assigned to a single ServingPeer,
   388  // this amounts to just dolling out the Volumes to ObervingPeers with the required
   389  // livenessCheckRedundancy. Similarly, the ReconEndpoints are dolled out with this
   390  // same livenessCheckRedundancy.
   391  //
   392  // It is a bit misleading for an ObservingPeer to report that a VolumeGroup is "alive"
   393  // when not all of that VolumeGroup's Volumes have been checked. Similarly, it is a
   394  // bit misleading for an ObservingPeer to report that a ServingPeer is "alive" when
   395  // not all of that ServingPeer's VolumeGroups have been checked. Therefore, to get an
   396  // accurate picture of that state of a VolumeGroup or ServingPeer, all results from
   397  // all ObservingPeers should be consulted as a set when making any availability
   398  // decision. As there is no way to check an empty VolumeGroup, there state will not
   399  // be in the resultant internalLivenessReport. However, ServingPeers that have no
   400  // VolumeGroups assigned will still be in the resultant internalLivenessReport.
   401  func computeLivenessCheckAssignments(observingPeerNameList []string) (internalLivenessReport *internalLivenessReportStruct) {
   402  	var (
   403  		alreadyInSwiftReconEndpointIAddrSet   bool
   404  		curSwiftConfFileMap                   map[string]time.Time
   405  		effectiveLivenessCheckRedundancy      uint64
   406  		effectiveLivenessCheckRedundancyIndex uint64
   407  		err                                   error
   408  		fileInfo                              os.FileInfo
   409  		fileInfoSlice                         []os.FileInfo
   410  		fileInfoModTime                       time.Time
   411  		fileInfoName                          string
   412  		inSwiftConfFileMap                    bool
   413  		internalObservingPeerReport           *internalObservingPeerReportStruct
   414  		internalReconEndpointReport           *internalReconEndpointReportStruct
   415  		internalServingPeerReport             *internalServingPeerReportStruct
   416  		internalVolumeGroupReport             *internalVolumeGroupReportStruct
   417  		internalVolumeReport                  *internalVolumeReportStruct
   418  		matchedRingFilename                   bool
   419  		needToUpdateSwiftConfFileMap          bool
   420  		notYetAdded                           bool
   421  		observingPeerIndex                    uint64
   422  		observingPeerName                     string
   423  		ok                                    bool
   424  		prevFileInfoModTime                   time.Time
   425  		ringFileData                          []byte
   426  		ringFileName                          string
   427  		ringFileMagic                         []byte
   428  		ringFilePayload                       []byte
   429  		ringFilePayloadJSON                   *ringFilePayloadJSONStruct
   430  		ringFilePayloadJSONDev                *ringFilePayloadJSONDevStruct
   431  		ringFilePayloadLen                    int32
   432  		ringFileReader                        *gzip.Reader
   433  		ringFileReadLen                       int
   434  		ringFileVersion                       uint16
   435  		servingPeer                           *peerStruct
   436  		servingPeerName                       string
   437  		swiftReconEndpoint                    string
   438  		swiftReconEndpointIPAddrSet           map[string]struct{}
   439  		volumeGroup                           *volumeGroupStruct
   440  		volumeGroupName                       string
   441  		volumeName                            string
   442  		volumeToCheck                         *volumeStruct
   443  	)
   444  
   445  	if 0 == len(observingPeerNameList) {
   446  		err = fmt.Errorf("computeLivenessCheckAssignments(): len(observingPeerNameList) cannot be zero")
   447  		panic(err)
   448  	}
   449  
   450  	// Determine reconEndpoints
   451  
   452  	if 0 == globals.swiftReconChecksPerConfCheck {
   453  		globals.swiftReconEndpointSet = make(map[string]struct{})
   454  	} else {
   455  		if 0 == globals.swiftReconChecksUntilConfCheck {
   456  			// Time to potentially refresh globals.swiftConfFileMap & globals.swiftReconEndpointSet
   457  
   458  			globals.swiftReconChecksUntilConfCheck = globals.swiftReconChecksPerConfCheck
   459  
   460  			fileInfoSlice, err = ioutil.ReadDir(globals.swiftConfDir)
   461  			if nil != err {
   462  				logger.FatalfWithError(err, "Unable to read [SwiftClient]SwiftConfDir (%s)", globals.swiftConfDir)
   463  			}
   464  
   465  			curSwiftConfFileMap = make(map[string]time.Time)
   466  
   467  			for _, fileInfo = range fileInfoSlice {
   468  				fileInfoName = fileInfo.Name()
   469  				switch fileInfoName {
   470  				case "account.ring.gz":
   471  					matchedRingFilename = true
   472  				case "container.ring.gz":
   473  					matchedRingFilename = true
   474  				default:
   475  					matchedRingFilename, err = regexp.MatchString("^object.*\\.ring\\.gz$", fileInfoName)
   476  					if nil != err {
   477  						logger.FatalfWithError(err, "Unexpected failure calling regexp.MatchString()")
   478  					}
   479  				}
   480  
   481  				if matchedRingFilename {
   482  					curSwiftConfFileMap[fileInfoName] = fileInfo.ModTime()
   483  				}
   484  			}
   485  
   486  			if len(globals.swiftConfFileMap) != len(curSwiftConfFileMap) {
   487  				needToUpdateSwiftConfFileMap = true
   488  			} else {
   489  				needToUpdateSwiftConfFileMap = false
   490  				for fileInfoName, fileInfoModTime = range curSwiftConfFileMap {
   491  					prevFileInfoModTime, inSwiftConfFileMap = globals.swiftConfFileMap[fileInfoName]
   492  					if !inSwiftConfFileMap || (fileInfoModTime != prevFileInfoModTime) {
   493  						needToUpdateSwiftConfFileMap = true
   494  					}
   495  				}
   496  			}
   497  
   498  			if needToUpdateSwiftConfFileMap {
   499  				// We must refresh globals.swiftConfFileMap & globals.swiftReconEndpointSet
   500  
   501  				globals.swiftConfFileMap = curSwiftConfFileMap
   502  
   503  				swiftReconEndpointIPAddrSet = make(map[string]struct{})
   504  				globals.swiftReconEndpointSet = make(map[string]struct{})
   505  
   506  				for ringFileName = range globals.swiftConfFileMap {
   507  					ringFileData, err = ioutil.ReadFile(globals.swiftConfDir + "/" + ringFileName)
   508  					if nil == err {
   509  						ringFileReader, err = gzip.NewReader(bytes.NewReader(ringFileData))
   510  						if nil == err {
   511  							ringFileMagic = make([]byte, 4)
   512  							ringFileReadLen, err = ringFileReader.Read(ringFileMagic)
   513  							if nil == err {
   514  								if ringFileReadLen == len(ringFileMagic) {
   515  									if bytes.Equal([]byte("R1NG"), ringFileMagic) {
   516  										err = binary.Read(ringFileReader, binary.BigEndian, &ringFileVersion)
   517  										if nil == err {
   518  											if 1 == ringFileVersion {
   519  												err = binary.Read(ringFileReader, binary.BigEndian, &ringFilePayloadLen)
   520  												if nil == err {
   521  													ringFilePayload = make([]byte, ringFilePayloadLen)
   522  													ringFileReadLen, err = ringFileReader.Read(ringFilePayload)
   523  													if nil == err {
   524  														if ringFileReadLen == len(ringFilePayload) {
   525  															ringFilePayloadJSON = &ringFilePayloadJSONStruct{}
   526  															err = json.Unmarshal(ringFilePayload, ringFilePayloadJSON)
   527  															if nil == err {
   528  																for _, ringFilePayloadJSONDev = range ringFilePayloadJSON.Devs {
   529  																	if nil != ringFilePayloadJSONDev {
   530  																		_, alreadyInSwiftReconEndpointIAddrSet = swiftReconEndpointIPAddrSet[ringFilePayloadJSONDev.IP]
   531  																		if !alreadyInSwiftReconEndpointIAddrSet {
   532  																			swiftReconEndpointIPAddrSet[ringFilePayloadJSONDev.IP] = struct{}{}
   533  																			swiftReconEndpoint = fmt.Sprintf("%s:%d", ringFilePayloadJSONDev.IP, ringFilePayloadJSONDev.Port)
   534  																			globals.swiftReconEndpointSet[swiftReconEndpoint] = struct{}{}
   535  																		}
   536  																	}
   537  																}
   538  															} else {
   539  																logger.WarnfWithError(err, "Unable to json.Unmarshal ringFilePayload from ring file %s", fileInfoName)
   540  															}
   541  														} else {
   542  															logger.Warnf("Misread of ringFilePayload from ring file %s", fileInfoName)
   543  														}
   544  													} else {
   545  														logger.WarnfWithError(err, "Unable to read ringFilePayload from ring file %s", fileInfoName)
   546  													}
   547  												} else {
   548  													logger.WarnfWithError(err, "Unable to read ringFilePayloadLen from ring file %s", fileInfoName)
   549  												}
   550  											} else {
   551  												logger.Warnf("Value of ringFileVersion unexpected from ring file %s", fileInfoName)
   552  											}
   553  										} else {
   554  											logger.WarnfWithError(err, "Unable to read ringFileVersion from ring file %s", fileInfoName)
   555  										}
   556  									} else {
   557  										logger.Warnf("Value of ringFileMagic unexpected from ring file %s", fileInfoName)
   558  									}
   559  								} else {
   560  									logger.Warnf("Misread of ringFileMagic from ring file %s", fileInfoName)
   561  								}
   562  							} else {
   563  								logger.WarnfWithError(err, "Unable to read ringFileMagic from ring file %s", fileInfoName)
   564  							}
   565  							err = ringFileReader.Close()
   566  							if nil != err {
   567  								logger.WarnfWithError(err, "Unable to close gzip.Reader from ring file %s", fileInfoName)
   568  							}
   569  						} else {
   570  							logger.WarnfWithError(err, "Unable to create gzip.Reader from ring file %s", fileInfoName)
   571  						}
   572  					} else {
   573  						logger.WarnfWithError(err, "Unable to read ring file %s", fileInfoName)
   574  					}
   575  				}
   576  			}
   577  		} else {
   578  			globals.swiftReconChecksUntilConfCheck--
   579  		}
   580  	}
   581  
   582  	// Prepare fresh internalLivenessReport
   583  
   584  	internalLivenessReport = &internalLivenessReportStruct{
   585  		observingPeer: make(map[string]*internalObservingPeerReportStruct),
   586  	}
   587  
   588  	// Adjust effectiveLivenessCheckRedundancy to be no more than len(observingPeerNameList)
   589  
   590  	if uint64(len(observingPeerNameList)) < globals.livenessCheckRedundancy {
   591  		effectiveLivenessCheckRedundancy = uint64(len(observingPeerNameList))
   592  	} else {
   593  		effectiveLivenessCheckRedundancy = globals.livenessCheckRedundancy
   594  	}
   595  
   596  	// Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling Volumes
   597  
   598  	observingPeerIndex = 0
   599  
   600  	for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ {
   601  		for _, volumeToCheck = range globals.volumeToCheckList {
   602  			// Add volumeToCheck to currently indexed ObservingPeer
   603  
   604  			volumeName = volumeToCheck.name
   605  			volumeGroup = volumeToCheck.volumeGroup
   606  			volumeGroupName = volumeGroup.name
   607  			servingPeer = volumeGroup.peer
   608  			if nil == servingPeer {
   609  				servingPeerName = globals.whoAmI
   610  			} else {
   611  				servingPeerName = servingPeer.name
   612  			}
   613  
   614  			notYetAdded = true // Avoid duplicate assignments
   615  
   616  			for notYetAdded {
   617  				observingPeerName = observingPeerNameList[observingPeerIndex]
   618  
   619  				internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName]
   620  				if !ok {
   621  					internalObservingPeerReport = &internalObservingPeerReportStruct{
   622  						name:          observingPeerName,
   623  						servingPeer:   make(map[string]*internalServingPeerReportStruct),
   624  						reconEndpoint: make(map[string]*internalReconEndpointReportStruct),
   625  					}
   626  					internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport
   627  				}
   628  
   629  				internalServingPeerReport, ok = internalObservingPeerReport.servingPeer[servingPeerName]
   630  				if !ok {
   631  					internalServingPeerReport = &internalServingPeerReportStruct{
   632  						observingPeer: internalObservingPeerReport,
   633  						name:          servingPeerName,
   634  						state:         StateUnknown,
   635  						lastCheckTime: time.Time{},
   636  						volumeGroup:   make(map[string]*internalVolumeGroupReportStruct),
   637  					}
   638  					internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport
   639  				}
   640  
   641  				internalVolumeGroupReport, ok = internalServingPeerReport.volumeGroup[volumeGroupName]
   642  				if !ok {
   643  					internalVolumeGroupReport = &internalVolumeGroupReportStruct{
   644  						servingPeer:   internalServingPeerReport,
   645  						name:          volumeGroupName,
   646  						state:         StateUnknown,
   647  						lastCheckTime: time.Time{},
   648  						volume:        make(map[string]*internalVolumeReportStruct),
   649  					}
   650  					internalServingPeerReport.volumeGroup[volumeGroupName] = internalVolumeGroupReport
   651  				}
   652  
   653  				_, ok = internalVolumeGroupReport.volume[volumeName]
   654  
   655  				if ok {
   656  					// Need to step to the next ObservingPeer because this one is already watching this Volume
   657  				} else {
   658  					// New Volume for this ObservingPeer... so add it
   659  
   660  					internalVolumeReport = &internalVolumeReportStruct{
   661  						volumeGroup:   internalVolumeGroupReport,
   662  						name:          volumeName,
   663  						state:         StateUnknown,
   664  						lastCheckTime: time.Time{},
   665  					}
   666  
   667  					internalVolumeGroupReport.volume[volumeName] = internalVolumeReport
   668  
   669  					notYetAdded = false
   670  				}
   671  
   672  				// Cycle to next ObservingPeer
   673  
   674  				observingPeerIndex++
   675  				if observingPeerIndex == uint64(len(observingPeerNameList)) {
   676  					observingPeerIndex = 0
   677  				}
   678  			}
   679  		}
   680  	}
   681  
   682  	// Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling "empty" VolumeGroups
   683  
   684  	for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ {
   685  		for volumeGroupName, servingPeerName = range globals.emptyVolumeGroupToCheckSet {
   686  			// Add "empty" VolumeGroup to currently indexed ObservingPeer
   687  
   688  			notYetAdded = true // Avoid duplicate assignments
   689  
   690  			for notYetAdded {
   691  				observingPeerName = observingPeerNameList[observingPeerIndex]
   692  
   693  				internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName]
   694  				if !ok {
   695  					internalObservingPeerReport = &internalObservingPeerReportStruct{
   696  						name:          observingPeerName,
   697  						servingPeer:   make(map[string]*internalServingPeerReportStruct),
   698  						reconEndpoint: make(map[string]*internalReconEndpointReportStruct),
   699  					}
   700  					internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport
   701  				}
   702  
   703  				internalServingPeerReport, ok = internalObservingPeerReport.servingPeer[servingPeerName]
   704  				if !ok {
   705  					internalServingPeerReport = &internalServingPeerReportStruct{
   706  						observingPeer: internalObservingPeerReport,
   707  						name:          servingPeerName,
   708  						state:         StateUnknown,
   709  						lastCheckTime: time.Time{},
   710  						volumeGroup:   make(map[string]*internalVolumeGroupReportStruct),
   711  					}
   712  				}
   713  				internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport
   714  
   715  				_, ok = internalServingPeerReport.volumeGroup[volumeGroupName]
   716  
   717  				if ok {
   718  					// Need to step to the next ObservingPeer because this one is already watching this VolumeGroup
   719  				} else {
   720  					// New VolumeGroup for this ObservingPeer->ServingPeer... so add it
   721  
   722  					internalVolumeGroupReport = &internalVolumeGroupReportStruct{
   723  						servingPeer:   internalServingPeerReport,
   724  						name:          volumeGroupName,
   725  						state:         StateUnknown,
   726  						lastCheckTime: time.Time{},
   727  						volume:        make(map[string]*internalVolumeReportStruct),
   728  					}
   729  
   730  					internalServingPeerReport.volumeGroup[volumeGroupName] = internalVolumeGroupReport
   731  
   732  					notYetAdded = false
   733  				}
   734  
   735  				// Cycle to next ObservingPeer
   736  
   737  				observingPeerIndex++
   738  				if observingPeerIndex == uint64(len(observingPeerNameList)) {
   739  					observingPeerIndex = 0
   740  				}
   741  			}
   742  		}
   743  	}
   744  
   745  	// Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling "empty" ServingPeers
   746  
   747  	for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ {
   748  		for servingPeerName = range globals.emptyServingPeerToCheckSet {
   749  			// Add "empty" ServingPeer to currently indexed ObservingPeer
   750  
   751  			notYetAdded = true // Avoid duplicate assignments
   752  
   753  			for notYetAdded {
   754  				observingPeerName = observingPeerNameList[observingPeerIndex]
   755  
   756  				internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName]
   757  				if !ok {
   758  					internalObservingPeerReport = &internalObservingPeerReportStruct{
   759  						name:          observingPeerName,
   760  						servingPeer:   make(map[string]*internalServingPeerReportStruct),
   761  						reconEndpoint: make(map[string]*internalReconEndpointReportStruct),
   762  					}
   763  					internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport
   764  				}
   765  
   766  				_, ok = internalObservingPeerReport.servingPeer[servingPeerName]
   767  
   768  				if ok {
   769  					// Need to step to the next ObservingPeer because this one is already watching this ServingPeer
   770  				} else {
   771  					// New ServingPeer for this ObservingPeer... so add it
   772  
   773  					internalServingPeerReport = &internalServingPeerReportStruct{
   774  						observingPeer: internalObservingPeerReport,
   775  						name:          servingPeerName,
   776  						state:         StateUnknown,
   777  						lastCheckTime: time.Time{},
   778  						volumeGroup:   make(map[string]*internalVolumeGroupReportStruct),
   779  					}
   780  
   781  					internalObservingPeerReport.servingPeer[servingPeerName] = internalServingPeerReport
   782  
   783  					notYetAdded = false
   784  				}
   785  
   786  				// Cycle to next ObservingPeer
   787  
   788  				observingPeerIndex++
   789  				if observingPeerIndex == uint64(len(observingPeerNameList)) {
   790  					observingPeerIndex = 0
   791  				}
   792  			}
   793  		}
   794  	}
   795  
   796  	// Iterate through observingPeerNameList effectiveLivenessCheckRedundancy times scheduling ReconEndpoints
   797  
   798  	for effectiveLivenessCheckRedundancyIndex = 0; effectiveLivenessCheckRedundancyIndex < effectiveLivenessCheckRedundancy; effectiveLivenessCheckRedundancyIndex++ {
   799  		for swiftReconEndpoint = range globals.swiftReconEndpointSet {
   800  			// Add volumeToCheck to currently indexed ObservingPeer
   801  
   802  			notYetAdded = true // Avoid duplicate assignments
   803  
   804  			for notYetAdded {
   805  				observingPeerName = observingPeerNameList[observingPeerIndex]
   806  
   807  				internalObservingPeerReport, ok = internalLivenessReport.observingPeer[observingPeerName]
   808  				if !ok {
   809  					internalObservingPeerReport = &internalObservingPeerReportStruct{
   810  						name:          observingPeerName,
   811  						servingPeer:   make(map[string]*internalServingPeerReportStruct),
   812  						reconEndpoint: make(map[string]*internalReconEndpointReportStruct),
   813  					}
   814  					internalLivenessReport.observingPeer[observingPeerName] = internalObservingPeerReport
   815  				}
   816  
   817  				_, ok = internalObservingPeerReport.reconEndpoint[swiftReconEndpoint]
   818  
   819  				if ok {
   820  					// Need to step to the next ObservingPeer because this one is already watching this ReconEndpoint
   821  				} else {
   822  					// New ReconEndpoint for this ObservingPeer... so add it
   823  
   824  					internalReconEndpointReport = &internalReconEndpointReportStruct{
   825  						observingPeer:          internalObservingPeerReport,
   826  						ipAddrPort:             swiftReconEndpoint,
   827  						maxDiskUsagePercentage: 0,
   828  					}
   829  
   830  					internalObservingPeerReport.reconEndpoint[swiftReconEndpoint] = internalReconEndpointReport
   831  
   832  					notYetAdded = false
   833  				}
   834  
   835  				// Cycle to next ObservingPeer
   836  
   837  				observingPeerIndex++
   838  				if observingPeerIndex == uint64(len(observingPeerNameList)) {
   839  					observingPeerIndex = 0
   840  				}
   841  			}
   842  		}
   843  	}
   844  
   845  	return
   846  }
   847  
   848  func mergeObservingPeerReportIntoLivenessReport(internalObservingPeerReport *internalObservingPeerReportStruct, internalLivenessReport *internalLivenessReportStruct) {
   849  	var (
   850  		ok bool
   851  	)
   852  
   853  	_, ok = internalLivenessReport.observingPeer[internalObservingPeerReport.name]
   854  	if ok {
   855  		delete(internalLivenessReport.observingPeer, internalObservingPeerReport.name)
   856  	}
   857  
   858  	internalLivenessReport.observingPeer[internalObservingPeerReport.name] = internalObservingPeerReport
   859  }
   860  
   861  func updateMyObservingPeerReportWhileLocked(internalObservingPeerReport *internalObservingPeerReportStruct) {
   862  	var (
   863  		ok                         bool
   864  		reconEndpointIPAddrPort    string
   865  		reconEndpointIPAddrPortSet map[string]struct{}
   866  		servingPeerName            string
   867  		servingPeerNameSet         map[string]struct{}
   868  		servingPeerNew             *internalServingPeerReportStruct
   869  		servingPeerOld             *internalServingPeerReportStruct
   870  		volumeGroupName            string
   871  		volumeGroupNameSet         map[string]struct{}
   872  		volumeGroupNew             *internalVolumeGroupReportStruct
   873  		volumeGroupOld             *internalVolumeGroupReportStruct
   874  		volumeName                 string
   875  		volumeNameSet              map[string]struct{}
   876  	)
   877  
   878  	if (nil == globals.myObservingPeerReport) || (nil == internalObservingPeerReport) {
   879  		globals.myObservingPeerReport = internalObservingPeerReport
   880  		return
   881  	}
   882  
   883  	// Remove any ServingPeers from globals.myObservingPeerReport missing from internalObservingPeerReport
   884  
   885  	servingPeerNameSet = make(map[string]struct{})
   886  
   887  	for servingPeerName = range globals.myObservingPeerReport.servingPeer {
   888  		_, ok = internalObservingPeerReport.servingPeer[servingPeerName]
   889  		if !ok {
   890  			servingPeerNameSet[servingPeerName] = struct{}{}
   891  		}
   892  	}
   893  
   894  	for servingPeerName = range servingPeerNameSet {
   895  		delete(globals.myObservingPeerReport.servingPeer, servingPeerName)
   896  	}
   897  
   898  	// Add any ServingPeers from internalObservingPeerReport missing from globals.myObservingPeerReport
   899  
   900  	for servingPeerName = range internalObservingPeerReport.servingPeer {
   901  		_, ok = globals.myObservingPeerReport.servingPeer[servingPeerName]
   902  		if !ok {
   903  			globals.myObservingPeerReport.servingPeer[servingPeerName] = &internalServingPeerReportStruct{
   904  				observingPeer: globals.myObservingPeerReport,
   905  				name:          servingPeerName,
   906  				state:         StateUnknown,
   907  				lastCheckTime: time.Time{},
   908  				volumeGroup:   make(map[string]*internalVolumeGroupReportStruct),
   909  			}
   910  		}
   911  	}
   912  
   913  	// Now loop inside each ServingPeer (must now exist in both globals.myObservingPeerReport & internalObservingPeerReport)
   914  
   915  	for servingPeerName, servingPeerOld = range globals.myObservingPeerReport.servingPeer {
   916  		servingPeerNew = internalObservingPeerReport.servingPeer[servingPeerName]
   917  
   918  		// Remove any VolumeGroups from servingPeerOld missing from servingPeerNew
   919  
   920  		volumeGroupNameSet = make(map[string]struct{})
   921  
   922  		for volumeGroupName = range servingPeerOld.volumeGroup {
   923  			_, ok = servingPeerNew.volumeGroup[volumeGroupName]
   924  			if !ok {
   925  				volumeGroupNameSet[volumeGroupName] = struct{}{}
   926  			}
   927  		}
   928  
   929  		for volumeGroupName = range volumeGroupNameSet {
   930  			delete(servingPeerOld.volumeGroup, volumeGroupName)
   931  		}
   932  
   933  		// Add any VolumeGroups from servingPeerNew missing from servingPeerOld
   934  
   935  		for volumeGroupName = range servingPeerNew.volumeGroup {
   936  			_, ok = servingPeerOld.volumeGroup[volumeGroupName]
   937  			if !ok {
   938  				servingPeerOld.volumeGroup[volumeGroupName] = &internalVolumeGroupReportStruct{
   939  					servingPeer:   servingPeerOld,
   940  					name:          volumeGroupName,
   941  					state:         StateUnknown,
   942  					lastCheckTime: time.Time{},
   943  					volume:        make(map[string]*internalVolumeReportStruct),
   944  				}
   945  			}
   946  		}
   947  
   948  		// Now loop inside each VolumeGroup (must now exist in both servingPeerOld & servingPeerNew)
   949  
   950  		for volumeGroupName, volumeGroupOld = range servingPeerOld.volumeGroup {
   951  			volumeGroupNew = servingPeerNew.volumeGroup[volumeGroupName]
   952  
   953  			// Remove any Volumes from volumeGroupOld missing from volumeGroupNew
   954  
   955  			volumeNameSet = make(map[string]struct{})
   956  
   957  			for volumeName = range volumeGroupOld.volume {
   958  				_, ok = volumeGroupNew.volume[volumeName]
   959  				if !ok {
   960  					volumeNameSet[volumeName] = struct{}{}
   961  				}
   962  			}
   963  
   964  			for volumeName = range volumeNameSet {
   965  				delete(volumeGroupOld.volume, volumeName)
   966  			}
   967  
   968  			// Add any Volumes from volumeGroupNew missing from VolumeGroupOld
   969  
   970  			for volumeName = range volumeGroupNew.volume {
   971  				_, ok = volumeGroupOld.volume[volumeName]
   972  				if !ok {
   973  					volumeGroupOld.volume[volumeName] = &internalVolumeReportStruct{
   974  						volumeGroup:   volumeGroupOld,
   975  						name:          volumeName,
   976  						state:         StateUnknown,
   977  						lastCheckTime: time.Time{},
   978  					}
   979  				}
   980  			}
   981  		}
   982  	}
   983  
   984  	// Remove any ReconEndpoints from globals.myObservingPeerReport missing from internalObservingPeerReport
   985  
   986  	reconEndpointIPAddrPortSet = make(map[string]struct{})
   987  
   988  	for reconEndpointIPAddrPort = range globals.myObservingPeerReport.reconEndpoint {
   989  		_, ok = internalObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort]
   990  		if !ok {
   991  			reconEndpointIPAddrPortSet[reconEndpointIPAddrPort] = struct{}{}
   992  		}
   993  	}
   994  
   995  	for reconEndpointIPAddrPort = range reconEndpointIPAddrPortSet {
   996  		delete(globals.myObservingPeerReport.reconEndpoint, reconEndpointIPAddrPort)
   997  	}
   998  
   999  	// Add any ReconEndpoints from internalObservingPeerReport missing from globals.myObservingPeerReport
  1000  
  1001  	for reconEndpointIPAddrPort = range internalObservingPeerReport.reconEndpoint {
  1002  		_, ok = globals.myObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort]
  1003  		if !ok {
  1004  			globals.myObservingPeerReport.reconEndpoint[reconEndpointIPAddrPort] = &internalReconEndpointReportStruct{
  1005  				observingPeer:          globals.myObservingPeerReport,
  1006  				ipAddrPort:             reconEndpointIPAddrPort,
  1007  				maxDiskUsagePercentage: 0,
  1008  			}
  1009  		}
  1010  	}
  1011  }