github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/labix.org/v2/mgo/cluster.go (about) 1 // mgo - MongoDB driver for Go 2 // 3 // Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net> 4 // 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // 1. Redistributions of source code must retain the above copyright notice, this 11 // list of conditions and the following disclaimer. 12 // 2. Redistributions in binary form must reproduce the above copyright notice, 13 // this list of conditions and the following disclaimer in the documentation 14 // and/or other materials provided with the distribution. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 package mgo 28 29 import ( 30 "camlistore.org/third_party/labix.org/v2/mgo/bson" 31 "errors" 32 "net" 33 "sync" 34 "time" 35 ) 36 37 // --------------------------------------------------------------------------- 38 // Mongo cluster encapsulation. 39 // 40 // A cluster enables the communication with one or more servers participating 41 // in a mongo cluster. This works with individual servers, a replica set, 42 // a replica pair, one or multiple mongos routers, etc. 43 44 type mongoCluster struct { 45 sync.RWMutex 46 serverSynced sync.Cond 47 userSeeds []string 48 dynaSeeds []string 49 servers mongoServers 50 masters mongoServers 51 references int 52 syncing bool 53 direct bool 54 cachedIndex map[string]bool 55 sync chan bool 56 dial dialer 57 } 58 59 func newCluster(userSeeds []string, direct bool, dial dialer) *mongoCluster { 60 cluster := &mongoCluster{ 61 userSeeds: userSeeds, 62 references: 1, 63 direct: direct, 64 dial: dial, 65 } 66 cluster.serverSynced.L = cluster.RWMutex.RLocker() 67 cluster.sync = make(chan bool, 1) 68 stats.cluster(+1) 69 go cluster.syncServersLoop() 70 return cluster 71 } 72 73 // Acquire increases the reference count for the cluster. 74 func (cluster *mongoCluster) Acquire() { 75 cluster.Lock() 76 cluster.references++ 77 debugf("Cluster %p acquired (refs=%d)", cluster, cluster.references) 78 cluster.Unlock() 79 } 80 81 // Release decreases the reference count for the cluster. Once 82 // it reaches zero, all servers will be closed. 83 func (cluster *mongoCluster) Release() { 84 cluster.Lock() 85 if cluster.references == 0 { 86 panic("cluster.Release() with references == 0") 87 } 88 cluster.references-- 89 debugf("Cluster %p released (refs=%d)", cluster, cluster.references) 90 if cluster.references == 0 { 91 for _, server := range cluster.servers.Slice() { 92 server.Close() 93 } 94 // Wake up the sync loop so it can die. 95 cluster.syncServers() 96 stats.cluster(-1) 97 } 98 cluster.Unlock() 99 } 100 101 func (cluster *mongoCluster) LiveServers() (servers []string) { 102 cluster.RLock() 103 for _, serv := range cluster.servers.Slice() { 104 servers = append(servers, serv.Addr) 105 } 106 cluster.RUnlock() 107 return servers 108 } 109 110 func (cluster *mongoCluster) removeServer(server *mongoServer) { 111 cluster.Lock() 112 cluster.masters.Remove(server) 113 other := cluster.servers.Remove(server) 114 cluster.Unlock() 115 if other != nil { 116 other.Close() 117 log("Removed server ", server.Addr, " from cluster.") 118 } 119 server.Close() 120 } 121 122 type isMasterResult struct { 123 IsMaster bool 124 Secondary bool 125 Primary string 126 Hosts []string 127 Passives []string 128 Tags bson.D 129 Msg string 130 } 131 132 func (cluster *mongoCluster) isMaster(socket *mongoSocket, result *isMasterResult) error { 133 // Monotonic let's it talk to a slave and still hold the socket. 134 session := newSession(Monotonic, cluster, 10*time.Second) 135 session.setSocket(socket) 136 err := session.Run("ismaster", result) 137 session.Close() 138 return err 139 } 140 141 type possibleTimeout interface { 142 Timeout() bool 143 } 144 145 var syncSocketTimeout = 5 * time.Second 146 147 func (cluster *mongoCluster) syncServer(server *mongoServer) (info *mongoServerInfo, hosts []string, err error) { 148 addr := server.Addr 149 log("SYNC Processing ", addr, "...") 150 151 // Retry a few times to avoid knocking a server down for a hiccup. 152 var result isMasterResult 153 var tryerr error 154 for retry := 0; ; retry++ { 155 if retry == 3 { 156 return nil, nil, tryerr 157 } 158 if retry > 0 { 159 // Don't abuse the server needlessly if there's something actually wrong. 160 if err, ok := tryerr.(possibleTimeout); ok && err.Timeout() { 161 // Give a chance for waiters to timeout as well. 162 cluster.serverSynced.Broadcast() 163 } 164 time.Sleep(500 * time.Millisecond) 165 } 166 167 // It's not clear what would be a good timeout here. Is it 168 // better to wait longer or to retry? 169 socket, _, err := server.AcquireSocket(0, syncSocketTimeout) 170 if err != nil { 171 tryerr = err 172 logf("SYNC Failed to get socket to %s: %v", addr, err) 173 continue 174 } 175 err = cluster.isMaster(socket, &result) 176 socket.Release() 177 if err != nil { 178 tryerr = err 179 logf("SYNC Command 'ismaster' to %s failed: %v", addr, err) 180 continue 181 } 182 debugf("SYNC Result of 'ismaster' from %s: %#v", addr, result) 183 break 184 } 185 186 if result.IsMaster { 187 debugf("SYNC %s is a master.", addr) 188 // Made an incorrect assumption above, so fix stats. 189 stats.conn(-1, false) 190 stats.conn(+1, true) 191 } else if result.Secondary { 192 debugf("SYNC %s is a slave.", addr) 193 } else if cluster.direct { 194 logf("SYNC %s in unknown state. Pretending it's a slave due to direct connection.", addr) 195 } else { 196 logf("SYNC %s is neither a master nor a slave.", addr) 197 // Made an incorrect assumption above, so fix stats. 198 stats.conn(-1, false) 199 return nil, nil, errors.New(addr + " is not a master nor slave") 200 } 201 202 info = &mongoServerInfo{ 203 Master: result.IsMaster, 204 Mongos: result.Msg == "isdbgrid", 205 Tags: result.Tags, 206 } 207 208 hosts = make([]string, 0, 1+len(result.Hosts)+len(result.Passives)) 209 if result.Primary != "" { 210 // First in the list to speed up master discovery. 211 hosts = append(hosts, result.Primary) 212 } 213 hosts = append(hosts, result.Hosts...) 214 hosts = append(hosts, result.Passives...) 215 216 debugf("SYNC %s knows about the following peers: %#v", addr, hosts) 217 return info, hosts, nil 218 } 219 220 type syncKind bool 221 222 const ( 223 completeSync syncKind = true 224 partialSync syncKind = false 225 ) 226 227 func (cluster *mongoCluster) addServer(server *mongoServer, info *mongoServerInfo, syncKind syncKind) { 228 cluster.Lock() 229 current := cluster.servers.Search(server.ResolvedAddr) 230 if current == nil { 231 if syncKind == partialSync { 232 cluster.Unlock() 233 server.Close() 234 log("SYNC Discarding unknown server ", server.Addr, " due to partial sync.") 235 return 236 } 237 cluster.servers.Add(server) 238 if info.Master { 239 cluster.masters.Add(server) 240 log("SYNC Adding ", server.Addr, " to cluster as a master.") 241 } else { 242 log("SYNC Adding ", server.Addr, " to cluster as a slave.") 243 } 244 } else { 245 if server != current { 246 panic("addServer attempting to add duplicated server") 247 } 248 if server.Info().Master != info.Master { 249 if info.Master { 250 log("SYNC Server ", server.Addr, " is now a master.") 251 cluster.masters.Add(server) 252 } else { 253 log("SYNC Server ", server.Addr, " is now a slave.") 254 cluster.masters.Remove(server) 255 } 256 } 257 } 258 server.SetInfo(info) 259 debugf("SYNC Broadcasting availability of server %s", server.Addr) 260 cluster.serverSynced.Broadcast() 261 cluster.Unlock() 262 } 263 264 func (cluster *mongoCluster) getKnownAddrs() []string { 265 cluster.RLock() 266 max := len(cluster.userSeeds) + len(cluster.dynaSeeds) + cluster.servers.Len() 267 seen := make(map[string]bool, max) 268 known := make([]string, 0, max) 269 270 add := func(addr string) { 271 if _, found := seen[addr]; !found { 272 seen[addr] = true 273 known = append(known, addr) 274 } 275 } 276 277 for _, addr := range cluster.userSeeds { 278 add(addr) 279 } 280 for _, addr := range cluster.dynaSeeds { 281 add(addr) 282 } 283 for _, serv := range cluster.servers.Slice() { 284 add(serv.Addr) 285 } 286 cluster.RUnlock() 287 288 return known 289 } 290 291 // syncServers injects a value into the cluster.sync channel to force 292 // an iteration of the syncServersLoop function. 293 func (cluster *mongoCluster) syncServers() { 294 select { 295 case cluster.sync <- true: 296 default: 297 } 298 } 299 300 // How long to wait for a checkup of the cluster topology if nothing 301 // else kicks a synchronization before that. 302 const syncServersDelay = 30 * time.Second 303 304 // syncServersLoop loops while the cluster is alive to keep its idea of 305 // the server topology up-to-date. It must be called just once from 306 // newCluster. The loop iterates once syncServersDelay has passed, or 307 // if somebody injects a value into the cluster.sync channel to force a 308 // synchronization. A loop iteration will contact all servers in 309 // parallel, ask them about known peers and their own role within the 310 // cluster, and then attempt to do the same with all the peers 311 // retrieved. 312 func (cluster *mongoCluster) syncServersLoop() { 313 for { 314 debugf("SYNC Cluster %p is starting a sync loop iteration.", cluster) 315 316 cluster.Lock() 317 if cluster.references == 0 { 318 cluster.Unlock() 319 break 320 } 321 cluster.references++ // Keep alive while syncing. 322 direct := cluster.direct 323 cluster.Unlock() 324 325 cluster.syncServersIteration(direct) 326 327 // We just synchronized, so consume any outstanding requests. 328 select { 329 case <-cluster.sync: 330 default: 331 } 332 333 cluster.Release() 334 335 // Hold off before allowing another sync. No point in 336 // burning CPU looking for down servers. 337 time.Sleep(500 * time.Millisecond) 338 339 cluster.Lock() 340 if cluster.references == 0 { 341 cluster.Unlock() 342 break 343 } 344 // Poke all waiters so they have a chance to timeout or 345 // restart syncing if they wish to. 346 cluster.serverSynced.Broadcast() 347 // Check if we have to restart immediately either way. 348 restart := !direct && cluster.masters.Empty() || cluster.servers.Empty() 349 cluster.Unlock() 350 351 if restart { 352 log("SYNC No masters found. Will synchronize again.") 353 continue 354 } 355 356 debugf("SYNC Cluster %p waiting for next requested or scheduled sync.", cluster) 357 358 // Hold off until somebody explicitly requests a synchronization 359 // or it's time to check for a cluster topology change again. 360 select { 361 case <-cluster.sync: 362 case <-time.After(syncServersDelay): 363 } 364 } 365 debugf("SYNC Cluster %p is stopping its sync loop.", cluster) 366 } 367 368 func (cluster *mongoCluster) server(addr string, tcpaddr *net.TCPAddr) *mongoServer { 369 cluster.RLock() 370 server := cluster.servers.Search(tcpaddr.String()) 371 cluster.RUnlock() 372 if server != nil { 373 return server 374 } 375 return newServer(addr, tcpaddr, cluster.sync, cluster.dial) 376 } 377 378 func resolveAddr(addr string) (*net.TCPAddr, error) { 379 tcpaddr, err := net.ResolveTCPAddr("tcp", addr) 380 if err != nil { 381 log("SYNC Failed to resolve ", addr, ": ", err.Error()) 382 return nil, err 383 } 384 if tcpaddr.String() != addr { 385 debug("SYNC Address ", addr, " resolved as ", tcpaddr.String()) 386 } 387 return tcpaddr, nil 388 } 389 390 type pendingAdd struct { 391 server *mongoServer 392 info *mongoServerInfo 393 } 394 395 func (cluster *mongoCluster) syncServersIteration(direct bool) { 396 log("SYNC Starting full topology synchronization...") 397 398 var wg sync.WaitGroup 399 var m sync.Mutex 400 notYetAdded := make(map[string]pendingAdd) 401 addIfFound := make(map[string]bool) 402 seen := make(map[string]bool) 403 syncKind := partialSync 404 405 var spawnSync func(addr string, byMaster bool) 406 spawnSync = func(addr string, byMaster bool) { 407 wg.Add(1) 408 go func() { 409 defer wg.Done() 410 411 tcpaddr, err := resolveAddr(addr) 412 if err != nil { 413 log("SYNC Failed to start sync of ", addr, ": ", err.Error()) 414 return 415 } 416 resolvedAddr := tcpaddr.String() 417 418 m.Lock() 419 if byMaster { 420 if pending, ok := notYetAdded[resolvedAddr]; ok { 421 delete(notYetAdded, resolvedAddr) 422 m.Unlock() 423 cluster.addServer(pending.server, pending.info, completeSync) 424 return 425 } 426 addIfFound[resolvedAddr] = true 427 } 428 if seen[resolvedAddr] { 429 m.Unlock() 430 return 431 } 432 seen[resolvedAddr] = true 433 m.Unlock() 434 435 server := cluster.server(addr, tcpaddr) 436 info, hosts, err := cluster.syncServer(server) 437 if err != nil { 438 cluster.removeServer(server) 439 return 440 } 441 442 m.Lock() 443 add := direct || info.Master || addIfFound[resolvedAddr] 444 if add { 445 syncKind = completeSync 446 } else { 447 notYetAdded[resolvedAddr] = pendingAdd{server, info} 448 } 449 m.Unlock() 450 if add { 451 cluster.addServer(server, info, completeSync) 452 } 453 if !direct { 454 for _, addr := range hosts { 455 spawnSync(addr, info.Master) 456 } 457 } 458 }() 459 } 460 461 knownAddrs := cluster.getKnownAddrs() 462 for _, addr := range knownAddrs { 463 spawnSync(addr, false) 464 } 465 wg.Wait() 466 467 if syncKind == completeSync { 468 logf("SYNC Synchronization was complete (got data from primary).") 469 for _, pending := range notYetAdded { 470 cluster.removeServer(pending.server) 471 } 472 } else { 473 logf("SYNC Synchronization was partial (cannot talk to primary).") 474 for _, pending := range notYetAdded { 475 cluster.addServer(pending.server, pending.info, partialSync) 476 } 477 } 478 479 cluster.Lock() 480 ml := cluster.masters.Len() 481 logf("SYNC Synchronization completed: %d master(s) and %d slave(s) alive.", ml, cluster.servers.Len()-ml) 482 483 // Update dynamic seeds, but only if we have any good servers. Otherwise, 484 // leave them alone for better chances of a successful sync in the future. 485 if syncKind == completeSync { 486 dynaSeeds := make([]string, cluster.servers.Len()) 487 for i, server := range cluster.servers.Slice() { 488 dynaSeeds[i] = server.Addr 489 } 490 cluster.dynaSeeds = dynaSeeds 491 debugf("SYNC New dynamic seeds: %#v\n", dynaSeeds) 492 } 493 cluster.Unlock() 494 } 495 496 var socketsPerServer = 4096 497 498 // AcquireSocket returns a socket to a server in the cluster. If slaveOk is 499 // true, it will attempt to return a socket to a slave server. If it is 500 // false, the socket will necessarily be to a master server. 501 func (cluster *mongoCluster) AcquireSocket(slaveOk bool, syncTimeout time.Duration, socketTimeout time.Duration, serverTags []bson.D) (s *mongoSocket, err error) { 502 var started time.Time 503 warnedLimit := false 504 for { 505 cluster.RLock() 506 for { 507 ml := cluster.masters.Len() 508 sl := cluster.servers.Len() 509 debugf("Cluster has %d known masters and %d known slaves.", ml, sl-ml) 510 if ml > 0 || slaveOk && sl > 0 { 511 break 512 } 513 if started.IsZero() { 514 started = time.Now() // Initialize after fast path above. 515 } else if syncTimeout != 0 && started.Before(time.Now().Add(-syncTimeout)) { 516 cluster.RUnlock() 517 return nil, errors.New("no reachable servers") 518 } 519 log("Waiting for servers to synchronize...") 520 cluster.syncServers() 521 522 // Remember: this will release and reacquire the lock. 523 cluster.serverSynced.Wait() 524 } 525 526 var server *mongoServer 527 if slaveOk { 528 server = cluster.servers.BestFit(serverTags) 529 } else { 530 server = cluster.masters.BestFit(nil) 531 } 532 cluster.RUnlock() 533 534 if server == nil { 535 // Must have failed the requested tags. Sleep to avoid spinning. 536 time.Sleep(1e8) 537 continue 538 } 539 540 s, abended, err := server.AcquireSocket(socketsPerServer, socketTimeout) 541 if err == errSocketLimit { 542 if !warnedLimit { 543 log("WARNING: Per-server connection limit reached.") 544 } 545 time.Sleep(1e8) 546 continue 547 } 548 if err != nil { 549 cluster.removeServer(server) 550 cluster.syncServers() 551 continue 552 } 553 if abended && !slaveOk { 554 var result isMasterResult 555 err := cluster.isMaster(s, &result) 556 if err != nil || !result.IsMaster { 557 logf("Cannot confirm server %s as master (%v)", server.Addr, err) 558 s.Release() 559 cluster.syncServers() 560 time.Sleep(1e8) 561 continue 562 } 563 } 564 return s, nil 565 } 566 panic("unreached") 567 } 568 569 func (cluster *mongoCluster) CacheIndex(cacheKey string, exists bool) { 570 cluster.Lock() 571 if cluster.cachedIndex == nil { 572 cluster.cachedIndex = make(map[string]bool) 573 } 574 if exists { 575 cluster.cachedIndex[cacheKey] = true 576 } else { 577 delete(cluster.cachedIndex, cacheKey) 578 } 579 cluster.Unlock() 580 } 581 582 func (cluster *mongoCluster) HasCachedIndex(cacheKey string) (result bool) { 583 cluster.RLock() 584 if cluster.cachedIndex != nil { 585 result = cluster.cachedIndex[cacheKey] 586 } 587 cluster.RUnlock() 588 return 589 } 590 591 func (cluster *mongoCluster) ResetIndexCache() { 592 cluster.Lock() 593 cluster.cachedIndex = make(map[string]bool) 594 cluster.Unlock() 595 }