github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/labix.org/v2/mgo/cluster.go (about)

     1  // mgo - MongoDB driver for Go
     2  //
     3  // Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net>
     4  //
     5  // All rights reserved.
     6  //
     7  // Redistribution and use in source and binary forms, with or without
     8  // modification, are permitted provided that the following conditions are met:
     9  //
    10  // 1. Redistributions of source code must retain the above copyright notice, this
    11  //    list of conditions and the following disclaimer.
    12  // 2. Redistributions in binary form must reproduce the above copyright notice,
    13  //    this list of conditions and the following disclaimer in the documentation
    14  //    and/or other materials provided with the distribution.
    15  //
    16  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
    17  // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    18  // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    19  // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
    20  // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    21  // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    22  // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    23  // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    24  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    25  // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    26  
    27  package mgo
    28  
    29  import (
    30  	"camlistore.org/third_party/labix.org/v2/mgo/bson"
    31  	"errors"
    32  	"net"
    33  	"sync"
    34  	"time"
    35  )
    36  
    37  // ---------------------------------------------------------------------------
    38  // Mongo cluster encapsulation.
    39  //
    40  // A cluster enables the communication with one or more servers participating
    41  // in a mongo cluster.  This works with individual servers, a replica set,
    42  // a replica pair, one or multiple mongos routers, etc.
    43  
    44  type mongoCluster struct {
    45  	sync.RWMutex
    46  	serverSynced sync.Cond
    47  	userSeeds    []string
    48  	dynaSeeds    []string
    49  	servers      mongoServers
    50  	masters      mongoServers
    51  	references   int
    52  	syncing      bool
    53  	direct       bool
    54  	cachedIndex  map[string]bool
    55  	sync         chan bool
    56  	dial         dialer
    57  }
    58  
    59  func newCluster(userSeeds []string, direct bool, dial dialer) *mongoCluster {
    60  	cluster := &mongoCluster{
    61  		userSeeds:  userSeeds,
    62  		references: 1,
    63  		direct:     direct,
    64  		dial:       dial,
    65  	}
    66  	cluster.serverSynced.L = cluster.RWMutex.RLocker()
    67  	cluster.sync = make(chan bool, 1)
    68  	stats.cluster(+1)
    69  	go cluster.syncServersLoop()
    70  	return cluster
    71  }
    72  
    73  // Acquire increases the reference count for the cluster.
    74  func (cluster *mongoCluster) Acquire() {
    75  	cluster.Lock()
    76  	cluster.references++
    77  	debugf("Cluster %p acquired (refs=%d)", cluster, cluster.references)
    78  	cluster.Unlock()
    79  }
    80  
    81  // Release decreases the reference count for the cluster. Once
    82  // it reaches zero, all servers will be closed.
    83  func (cluster *mongoCluster) Release() {
    84  	cluster.Lock()
    85  	if cluster.references == 0 {
    86  		panic("cluster.Release() with references == 0")
    87  	}
    88  	cluster.references--
    89  	debugf("Cluster %p released (refs=%d)", cluster, cluster.references)
    90  	if cluster.references == 0 {
    91  		for _, server := range cluster.servers.Slice() {
    92  			server.Close()
    93  		}
    94  		// Wake up the sync loop so it can die.
    95  		cluster.syncServers()
    96  		stats.cluster(-1)
    97  	}
    98  	cluster.Unlock()
    99  }
   100  
   101  func (cluster *mongoCluster) LiveServers() (servers []string) {
   102  	cluster.RLock()
   103  	for _, serv := range cluster.servers.Slice() {
   104  		servers = append(servers, serv.Addr)
   105  	}
   106  	cluster.RUnlock()
   107  	return servers
   108  }
   109  
   110  func (cluster *mongoCluster) removeServer(server *mongoServer) {
   111  	cluster.Lock()
   112  	cluster.masters.Remove(server)
   113  	other := cluster.servers.Remove(server)
   114  	cluster.Unlock()
   115  	if other != nil {
   116  		other.Close()
   117  		log("Removed server ", server.Addr, " from cluster.")
   118  	}
   119  	server.Close()
   120  }
   121  
   122  type isMasterResult struct {
   123  	IsMaster  bool
   124  	Secondary bool
   125  	Primary   string
   126  	Hosts     []string
   127  	Passives  []string
   128  	Tags      bson.D
   129  	Msg       string
   130  }
   131  
   132  func (cluster *mongoCluster) isMaster(socket *mongoSocket, result *isMasterResult) error {
   133  	// Monotonic let's it talk to a slave and still hold the socket.
   134  	session := newSession(Monotonic, cluster, 10*time.Second)
   135  	session.setSocket(socket)
   136  	err := session.Run("ismaster", result)
   137  	session.Close()
   138  	return err
   139  }
   140  
   141  type possibleTimeout interface {
   142  	Timeout() bool
   143  }
   144  
   145  var syncSocketTimeout = 5 * time.Second
   146  
   147  func (cluster *mongoCluster) syncServer(server *mongoServer) (info *mongoServerInfo, hosts []string, err error) {
   148  	addr := server.Addr
   149  	log("SYNC Processing ", addr, "...")
   150  
   151  	// Retry a few times to avoid knocking a server down for a hiccup.
   152  	var result isMasterResult
   153  	var tryerr error
   154  	for retry := 0; ; retry++ {
   155  		if retry == 3 {
   156  			return nil, nil, tryerr
   157  		}
   158  		if retry > 0 {
   159  			// Don't abuse the server needlessly if there's something actually wrong.
   160  			if err, ok := tryerr.(possibleTimeout); ok && err.Timeout() {
   161  				// Give a chance for waiters to timeout as well.
   162  				cluster.serverSynced.Broadcast()
   163  			}
   164  			time.Sleep(500 * time.Millisecond)
   165  		}
   166  
   167  		// It's not clear what would be a good timeout here. Is it
   168  		// better to wait longer or to retry?
   169  		socket, _, err := server.AcquireSocket(0, syncSocketTimeout)
   170  		if err != nil {
   171  			tryerr = err
   172  			logf("SYNC Failed to get socket to %s: %v", addr, err)
   173  			continue
   174  		}
   175  		err = cluster.isMaster(socket, &result)
   176  		socket.Release()
   177  		if err != nil {
   178  			tryerr = err
   179  			logf("SYNC Command 'ismaster' to %s failed: %v", addr, err)
   180  			continue
   181  		}
   182  		debugf("SYNC Result of 'ismaster' from %s: %#v", addr, result)
   183  		break
   184  	}
   185  
   186  	if result.IsMaster {
   187  		debugf("SYNC %s is a master.", addr)
   188  		// Made an incorrect assumption above, so fix stats.
   189  		stats.conn(-1, false)
   190  		stats.conn(+1, true)
   191  	} else if result.Secondary {
   192  		debugf("SYNC %s is a slave.", addr)
   193  	} else if cluster.direct {
   194  		logf("SYNC %s in unknown state. Pretending it's a slave due to direct connection.", addr)
   195  	} else {
   196  		logf("SYNC %s is neither a master nor a slave.", addr)
   197  		// Made an incorrect assumption above, so fix stats.
   198  		stats.conn(-1, false)
   199  		return nil, nil, errors.New(addr + " is not a master nor slave")
   200  	}
   201  
   202  	info = &mongoServerInfo{
   203  		Master: result.IsMaster,
   204  		Mongos: result.Msg == "isdbgrid",
   205  		Tags:   result.Tags,
   206  	}
   207  
   208  	hosts = make([]string, 0, 1+len(result.Hosts)+len(result.Passives))
   209  	if result.Primary != "" {
   210  		// First in the list to speed up master discovery.
   211  		hosts = append(hosts, result.Primary)
   212  	}
   213  	hosts = append(hosts, result.Hosts...)
   214  	hosts = append(hosts, result.Passives...)
   215  
   216  	debugf("SYNC %s knows about the following peers: %#v", addr, hosts)
   217  	return info, hosts, nil
   218  }
   219  
   220  type syncKind bool
   221  
   222  const (
   223  	completeSync syncKind = true
   224  	partialSync  syncKind = false
   225  )
   226  
   227  func (cluster *mongoCluster) addServer(server *mongoServer, info *mongoServerInfo, syncKind syncKind) {
   228  	cluster.Lock()
   229  	current := cluster.servers.Search(server.ResolvedAddr)
   230  	if current == nil {
   231  		if syncKind == partialSync {
   232  			cluster.Unlock()
   233  			server.Close()
   234  			log("SYNC Discarding unknown server ", server.Addr, " due to partial sync.")
   235  			return
   236  		}
   237  		cluster.servers.Add(server)
   238  		if info.Master {
   239  			cluster.masters.Add(server)
   240  			log("SYNC Adding ", server.Addr, " to cluster as a master.")
   241  		} else {
   242  			log("SYNC Adding ", server.Addr, " to cluster as a slave.")
   243  		}
   244  	} else {
   245  		if server != current {
   246  			panic("addServer attempting to add duplicated server")
   247  		}
   248  		if server.Info().Master != info.Master {
   249  			if info.Master {
   250  				log("SYNC Server ", server.Addr, " is now a master.")
   251  				cluster.masters.Add(server)
   252  			} else {
   253  				log("SYNC Server ", server.Addr, " is now a slave.")
   254  				cluster.masters.Remove(server)
   255  			}
   256  		}
   257  	}
   258  	server.SetInfo(info)
   259  	debugf("SYNC Broadcasting availability of server %s", server.Addr)
   260  	cluster.serverSynced.Broadcast()
   261  	cluster.Unlock()
   262  }
   263  
   264  func (cluster *mongoCluster) getKnownAddrs() []string {
   265  	cluster.RLock()
   266  	max := len(cluster.userSeeds) + len(cluster.dynaSeeds) + cluster.servers.Len()
   267  	seen := make(map[string]bool, max)
   268  	known := make([]string, 0, max)
   269  
   270  	add := func(addr string) {
   271  		if _, found := seen[addr]; !found {
   272  			seen[addr] = true
   273  			known = append(known, addr)
   274  		}
   275  	}
   276  
   277  	for _, addr := range cluster.userSeeds {
   278  		add(addr)
   279  	}
   280  	for _, addr := range cluster.dynaSeeds {
   281  		add(addr)
   282  	}
   283  	for _, serv := range cluster.servers.Slice() {
   284  		add(serv.Addr)
   285  	}
   286  	cluster.RUnlock()
   287  
   288  	return known
   289  }
   290  
   291  // syncServers injects a value into the cluster.sync channel to force
   292  // an iteration of the syncServersLoop function.
   293  func (cluster *mongoCluster) syncServers() {
   294  	select {
   295  	case cluster.sync <- true:
   296  	default:
   297  	}
   298  }
   299  
   300  // How long to wait for a checkup of the cluster topology if nothing
   301  // else kicks a synchronization before that.
   302  const syncServersDelay = 30 * time.Second
   303  
   304  // syncServersLoop loops while the cluster is alive to keep its idea of
   305  // the server topology up-to-date. It must be called just once from
   306  // newCluster.  The loop iterates once syncServersDelay has passed, or
   307  // if somebody injects a value into the cluster.sync channel to force a
   308  // synchronization.  A loop iteration will contact all servers in
   309  // parallel, ask them about known peers and their own role within the
   310  // cluster, and then attempt to do the same with all the peers
   311  // retrieved.
   312  func (cluster *mongoCluster) syncServersLoop() {
   313  	for {
   314  		debugf("SYNC Cluster %p is starting a sync loop iteration.", cluster)
   315  
   316  		cluster.Lock()
   317  		if cluster.references == 0 {
   318  			cluster.Unlock()
   319  			break
   320  		}
   321  		cluster.references++ // Keep alive while syncing.
   322  		direct := cluster.direct
   323  		cluster.Unlock()
   324  
   325  		cluster.syncServersIteration(direct)
   326  
   327  		// We just synchronized, so consume any outstanding requests.
   328  		select {
   329  		case <-cluster.sync:
   330  		default:
   331  		}
   332  
   333  		cluster.Release()
   334  
   335  		// Hold off before allowing another sync. No point in
   336  		// burning CPU looking for down servers.
   337  		time.Sleep(500 * time.Millisecond)
   338  
   339  		cluster.Lock()
   340  		if cluster.references == 0 {
   341  			cluster.Unlock()
   342  			break
   343  		}
   344  		// Poke all waiters so they have a chance to timeout or
   345  		// restart syncing if they wish to.
   346  		cluster.serverSynced.Broadcast()
   347  		// Check if we have to restart immediately either way.
   348  		restart := !direct && cluster.masters.Empty() || cluster.servers.Empty()
   349  		cluster.Unlock()
   350  
   351  		if restart {
   352  			log("SYNC No masters found. Will synchronize again.")
   353  			continue
   354  		}
   355  
   356  		debugf("SYNC Cluster %p waiting for next requested or scheduled sync.", cluster)
   357  
   358  		// Hold off until somebody explicitly requests a synchronization
   359  		// or it's time to check for a cluster topology change again.
   360  		select {
   361  		case <-cluster.sync:
   362  		case <-time.After(syncServersDelay):
   363  		}
   364  	}
   365  	debugf("SYNC Cluster %p is stopping its sync loop.", cluster)
   366  }
   367  
   368  func (cluster *mongoCluster) server(addr string, tcpaddr *net.TCPAddr) *mongoServer {
   369  	cluster.RLock()
   370  	server := cluster.servers.Search(tcpaddr.String())
   371  	cluster.RUnlock()
   372  	if server != nil {
   373  		return server
   374  	}
   375  	return newServer(addr, tcpaddr, cluster.sync, cluster.dial)
   376  }
   377  
   378  func resolveAddr(addr string) (*net.TCPAddr, error) {
   379  	tcpaddr, err := net.ResolveTCPAddr("tcp", addr)
   380  	if err != nil {
   381  		log("SYNC Failed to resolve ", addr, ": ", err.Error())
   382  		return nil, err
   383  	}
   384  	if tcpaddr.String() != addr {
   385  		debug("SYNC Address ", addr, " resolved as ", tcpaddr.String())
   386  	}
   387  	return tcpaddr, nil
   388  }
   389  
   390  type pendingAdd struct {
   391  	server *mongoServer
   392  	info   *mongoServerInfo
   393  }
   394  
   395  func (cluster *mongoCluster) syncServersIteration(direct bool) {
   396  	log("SYNC Starting full topology synchronization...")
   397  
   398  	var wg sync.WaitGroup
   399  	var m sync.Mutex
   400  	notYetAdded := make(map[string]pendingAdd)
   401  	addIfFound := make(map[string]bool)
   402  	seen := make(map[string]bool)
   403  	syncKind := partialSync
   404  
   405  	var spawnSync func(addr string, byMaster bool)
   406  	spawnSync = func(addr string, byMaster bool) {
   407  		wg.Add(1)
   408  		go func() {
   409  			defer wg.Done()
   410  
   411  			tcpaddr, err := resolveAddr(addr)
   412  			if err != nil {
   413  				log("SYNC Failed to start sync of ", addr, ": ", err.Error())
   414  				return
   415  			}
   416  			resolvedAddr := tcpaddr.String()
   417  
   418  			m.Lock()
   419  			if byMaster {
   420  				if pending, ok := notYetAdded[resolvedAddr]; ok {
   421  					delete(notYetAdded, resolvedAddr)
   422  					m.Unlock()
   423  					cluster.addServer(pending.server, pending.info, completeSync)
   424  					return
   425  				}
   426  				addIfFound[resolvedAddr] = true
   427  			}
   428  			if seen[resolvedAddr] {
   429  				m.Unlock()
   430  				return
   431  			}
   432  			seen[resolvedAddr] = true
   433  			m.Unlock()
   434  
   435  			server := cluster.server(addr, tcpaddr)
   436  			info, hosts, err := cluster.syncServer(server)
   437  			if err != nil {
   438  				cluster.removeServer(server)
   439  				return
   440  			}
   441  
   442  			m.Lock()
   443  			add := direct || info.Master || addIfFound[resolvedAddr]
   444  			if add {
   445  				syncKind = completeSync
   446  			} else {
   447  				notYetAdded[resolvedAddr] = pendingAdd{server, info}
   448  			}
   449  			m.Unlock()
   450  			if add {
   451  				cluster.addServer(server, info, completeSync)
   452  			}
   453  			if !direct {
   454  				for _, addr := range hosts {
   455  					spawnSync(addr, info.Master)
   456  				}
   457  			}
   458  		}()
   459  	}
   460  
   461  	knownAddrs := cluster.getKnownAddrs()
   462  	for _, addr := range knownAddrs {
   463  		spawnSync(addr, false)
   464  	}
   465  	wg.Wait()
   466  
   467  	if syncKind == completeSync {
   468  		logf("SYNC Synchronization was complete (got data from primary).")
   469  		for _, pending := range notYetAdded {
   470  			cluster.removeServer(pending.server)
   471  		}
   472  	} else {
   473  		logf("SYNC Synchronization was partial (cannot talk to primary).")
   474  		for _, pending := range notYetAdded {
   475  			cluster.addServer(pending.server, pending.info, partialSync)
   476  		}
   477  	}
   478  
   479  	cluster.Lock()
   480  	ml := cluster.masters.Len()
   481  	logf("SYNC Synchronization completed: %d master(s) and %d slave(s) alive.", ml, cluster.servers.Len()-ml)
   482  
   483  	// Update dynamic seeds, but only if we have any good servers. Otherwise,
   484  	// leave them alone for better chances of a successful sync in the future.
   485  	if syncKind == completeSync {
   486  		dynaSeeds := make([]string, cluster.servers.Len())
   487  		for i, server := range cluster.servers.Slice() {
   488  			dynaSeeds[i] = server.Addr
   489  		}
   490  		cluster.dynaSeeds = dynaSeeds
   491  		debugf("SYNC New dynamic seeds: %#v\n", dynaSeeds)
   492  	}
   493  	cluster.Unlock()
   494  }
   495  
   496  var socketsPerServer = 4096
   497  
   498  // AcquireSocket returns a socket to a server in the cluster.  If slaveOk is
   499  // true, it will attempt to return a socket to a slave server.  If it is
   500  // false, the socket will necessarily be to a master server.
   501  func (cluster *mongoCluster) AcquireSocket(slaveOk bool, syncTimeout time.Duration, socketTimeout time.Duration, serverTags []bson.D) (s *mongoSocket, err error) {
   502  	var started time.Time
   503  	warnedLimit := false
   504  	for {
   505  		cluster.RLock()
   506  		for {
   507  			ml := cluster.masters.Len()
   508  			sl := cluster.servers.Len()
   509  			debugf("Cluster has %d known masters and %d known slaves.", ml, sl-ml)
   510  			if ml > 0 || slaveOk && sl > 0 {
   511  				break
   512  			}
   513  			if started.IsZero() {
   514  				started = time.Now() // Initialize after fast path above.
   515  			} else if syncTimeout != 0 && started.Before(time.Now().Add(-syncTimeout)) {
   516  				cluster.RUnlock()
   517  				return nil, errors.New("no reachable servers")
   518  			}
   519  			log("Waiting for servers to synchronize...")
   520  			cluster.syncServers()
   521  
   522  			// Remember: this will release and reacquire the lock.
   523  			cluster.serverSynced.Wait()
   524  		}
   525  
   526  		var server *mongoServer
   527  		if slaveOk {
   528  			server = cluster.servers.BestFit(serverTags)
   529  		} else {
   530  			server = cluster.masters.BestFit(nil)
   531  		}
   532  		cluster.RUnlock()
   533  
   534  		if server == nil {
   535  			// Must have failed the requested tags. Sleep to avoid spinning.
   536  			time.Sleep(1e8)
   537  			continue
   538  		}
   539  
   540  		s, abended, err := server.AcquireSocket(socketsPerServer, socketTimeout)
   541  		if err == errSocketLimit {
   542  			if !warnedLimit {
   543  				log("WARNING: Per-server connection limit reached.")
   544  			}
   545  			time.Sleep(1e8)
   546  			continue
   547  		}
   548  		if err != nil {
   549  			cluster.removeServer(server)
   550  			cluster.syncServers()
   551  			continue
   552  		}
   553  		if abended && !slaveOk {
   554  			var result isMasterResult
   555  			err := cluster.isMaster(s, &result)
   556  			if err != nil || !result.IsMaster {
   557  				logf("Cannot confirm server %s as master (%v)", server.Addr, err)
   558  				s.Release()
   559  				cluster.syncServers()
   560  				time.Sleep(1e8)
   561  				continue
   562  			}
   563  		}
   564  		return s, nil
   565  	}
   566  	panic("unreached")
   567  }
   568  
   569  func (cluster *mongoCluster) CacheIndex(cacheKey string, exists bool) {
   570  	cluster.Lock()
   571  	if cluster.cachedIndex == nil {
   572  		cluster.cachedIndex = make(map[string]bool)
   573  	}
   574  	if exists {
   575  		cluster.cachedIndex[cacheKey] = true
   576  	} else {
   577  		delete(cluster.cachedIndex, cacheKey)
   578  	}
   579  	cluster.Unlock()
   580  }
   581  
   582  func (cluster *mongoCluster) HasCachedIndex(cacheKey string) (result bool) {
   583  	cluster.RLock()
   584  	if cluster.cachedIndex != nil {
   585  		result = cluster.cachedIndex[cacheKey]
   586  	}
   587  	cluster.RUnlock()
   588  	return
   589  }
   590  
   591  func (cluster *mongoCluster) ResetIndexCache() {
   592  	cluster.Lock()
   593  	cluster.cachedIndex = make(map[string]bool)
   594  	cluster.Unlock()
   595  }