github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/labix.org/v2/mgo/session.go (about)

     1  // mgo - MongoDB driver for Go
     2  //
     3  // Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net>
     4  //
     5  // All rights reserved.
     6  //
     7  // Redistribution and use in source and binary forms, with or without
     8  // modification, are permitted provided that the following conditions are met:
     9  //
    10  // 1. Redistributions of source code must retain the above copyright notice, this
    11  //    list of conditions and the following disclaimer.
    12  // 2. Redistributions in binary form must reproduce the above copyright notice,
    13  //    this list of conditions and the following disclaimer in the documentation
    14  //    and/or other materials provided with the distribution.
    15  //
    16  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
    17  // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    18  // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    19  // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
    20  // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    21  // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    22  // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    23  // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    24  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    25  // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    26  
    27  package mgo
    28  
    29  import (
    30  	"camlistore.org/third_party/labix.org/v2/mgo/bson"
    31  	"crypto/md5"
    32  	"encoding/hex"
    33  	"errors"
    34  	"fmt"
    35  	"math"
    36  	"net"
    37  	"reflect"
    38  	"sort"
    39  	"strconv"
    40  	"strings"
    41  	"sync"
    42  	"time"
    43  )
    44  
    45  type mode int
    46  
    47  const (
    48  	Eventual  mode = 0
    49  	Monotonic mode = 1
    50  	Strong    mode = 2
    51  )
    52  
    53  // When changing the Session type, check if newSession and copySession
    54  // need to be updated too.
    55  
    56  type Session struct {
    57  	m            sync.RWMutex
    58  	cluster_     *mongoCluster
    59  	slaveSocket  *mongoSocket
    60  	masterSocket *mongoSocket
    61  	slaveOk      bool
    62  	consistency  mode
    63  	queryConfig  query
    64  	safeOp       *queryOp
    65  	syncTimeout  time.Duration
    66  	sockTimeout  time.Duration
    67  	defaultdb    string
    68  	dialAuth     *authInfo
    69  	auth         []authInfo
    70  }
    71  
    72  type Database struct {
    73  	Session *Session
    74  	Name    string
    75  }
    76  
    77  type Collection struct {
    78  	Database *Database
    79  	Name     string // "collection"
    80  	FullName string // "db.collection"
    81  }
    82  
    83  type Query struct {
    84  	m       sync.Mutex
    85  	session *Session
    86  	query   // Enables default settings in session.
    87  }
    88  
    89  type query struct {
    90  	op       queryOp
    91  	prefetch float64
    92  	limit    int32
    93  }
    94  
    95  type getLastError struct {
    96  	CmdName  int         "getLastError"
    97  	W        interface{} "w,omitempty"
    98  	WTimeout int         "wtimeout,omitempty"
    99  	FSync    bool        "fsync,omitempty"
   100  	J        bool        "j,omitempty"
   101  }
   102  
   103  type Iter struct {
   104  	m              sync.Mutex
   105  	gotReply       sync.Cond
   106  	session        *Session
   107  	server         *mongoServer
   108  	docData        queue
   109  	err            error
   110  	op             getMoreOp
   111  	prefetch       float64
   112  	limit          int32
   113  	docsToReceive  int
   114  	docsBeforeMore int
   115  	timeout        time.Duration
   116  	timedout       bool
   117  }
   118  
   119  var ErrNotFound = errors.New("not found")
   120  
   121  const defaultPrefetch = 0.25
   122  
   123  // Dial establishes a new session to the cluster identified by the given seed
   124  // server(s). The session will enable communication with all of the servers in
   125  // the cluster, so the seed servers are used only to find out about the cluster
   126  // topology.
   127  //
   128  // Dial will timeout after 10 seconds if a server isn't reached. The returned
   129  // session will timeout operations after one minute by default if servers
   130  // aren't available. To customize the timeout, see DialWithTimeout,
   131  // SetSyncTimeout, and SetSocketTimeout.
   132  //
   133  // This method is generally called just once for a given cluster.  Further
   134  // sessions to the same cluster are then established using the New or Copy
   135  // methods on the obtained session. This will make them share the underlying
   136  // cluster, and manage the pool of connections appropriately.
   137  //
   138  // Once the session is not useful anymore, Close must be called to release the
   139  // resources appropriately.
   140  //
   141  // The seed servers must be provided in the following format:
   142  //
   143  //     [mongodb://][user:pass@]host1[:port1][,host2[:port2],...][/database][?options]
   144  //
   145  // For example, it may be as simple as:
   146  //
   147  //     localhost
   148  //
   149  // Or more involved like:
   150  //
   151  //     mongodb://myuser:mypass@localhost:40001,otherhost:40001/mydb
   152  //
   153  // If the port number is not provided for a server, it defaults to 27017.
   154  //
   155  // The username and password provided in the URL will be used to authenticate
   156  // into the database named after the slash at the end of the host names, or
   157  // into the "admin" database if none is provided.  The authentication information
   158  // will persist in sessions obtained through the New method as well.
   159  //
   160  // The following connection options are supported after the question mark:
   161  //
   162  //     connect=direct
   163  //
   164  //         This option will disable the automatic replica set server
   165  //         discovery logic, and will only use the servers provided.
   166  //         This enables forcing the communication with a specific
   167  //         server or set of servers (even if they are slaves).  Note
   168  //         that to talk to a slave you'll need to relax the consistency
   169  //         requirements using a Monotonic or Eventual mode via SetMode.
   170  //
   171  // Relevant documentation:
   172  //
   173  //     http://www.mongodb.org/display/DOCS/Connections
   174  //
   175  func Dial(url string) (*Session, error) {
   176  	session, err := DialWithTimeout(url, 10*time.Second)
   177  	if err == nil {
   178  		session.SetSyncTimeout(1 * time.Minute)
   179  		session.SetSocketTimeout(1 * time.Minute)
   180  	}
   181  	return session, err
   182  }
   183  
   184  // DialWithTimeout works like Dial, but uses timeout as the amount of time to
   185  // wait for a server to respond when first connecting and also on follow up
   186  // operations in the session. If timeout is zero, the call may block
   187  // forever waiting for a connection to be made.
   188  //
   189  // See SetSyncTimeout for customizing the timeout for the session.
   190  func DialWithTimeout(url string, timeout time.Duration) (*Session, error) {
   191  	uinfo, err := parseURL(url)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  	direct := false
   196  	for k, v := range uinfo.options {
   197  		switch k {
   198  		case "connect":
   199  			if v == "direct" {
   200  				direct = true
   201  				break
   202  			}
   203  			if v == "replicaSet" {
   204  				break
   205  			}
   206  			fallthrough
   207  		default:
   208  			return nil, errors.New("Unsupported connection URL option: " + k + "=" + v)
   209  		}
   210  	}
   211  	info := DialInfo{
   212  		Addrs:    uinfo.addrs,
   213  		Direct:   direct,
   214  		Timeout:  timeout,
   215  		Username: uinfo.user,
   216  		Password: uinfo.pass,
   217  		Database: uinfo.db,
   218  	}
   219  	return DialWithInfo(&info)
   220  }
   221  
   222  // DialInfo holds options for establishing a session with a MongoDB cluster.
   223  // To use a URL, see the Dial function.
   224  type DialInfo struct {
   225  	// Addrs holds the addresses for the seed servers.
   226  	Addrs []string
   227  
   228  	// Direct informs whether to establish connections only with the
   229  	// specified seed servers, or to obtain information for the whole
   230  	// cluster and establish connections with further servers too.
   231  	Direct bool
   232  
   233  	// Timeout is the amount of time to wait for a server to respond when
   234  	// first connecting and on follow up operations in the session. If
   235  	// timeout is zero, the call may block forever waiting for a connection
   236  	// to be established.
   237  	Timeout time.Duration
   238  
   239  	// Database is the database name used during the initial authentication.
   240  	// If set, the value is also returned as the default result from the
   241  	// Session.DB method, in place of "test".
   242  	Database string
   243  
   244  	// Username and Password inform the credentials for the initial
   245  	// authentication done against Database, if that is set,
   246  	// or the "admin" database otherwise. See the Session.Login method too.
   247  	Username string
   248  	Password string
   249  
   250  	// Dial optionally specifies the dial function for establishing
   251  	// connections with the MongoDB servers.
   252  	Dial func(addr net.Addr) (net.Conn, error)
   253  
   254  	// DialServer optionally specifies the dial function for establishing
   255  	// connections with the MongoDB servers.
   256  	//
   257  	// WARNING: This interface is experimental and may change.
   258  	DialServer func(addr *ServerAddr) (net.Conn, error)
   259  }
   260  
   261  // ServerAddr represents the address for establishing a connection to an
   262  // individual MongoDB server.
   263  //
   264  // WARNING: This interface is experimental and may change.
   265  type ServerAddr struct {
   266  	str string
   267  	tcp *net.TCPAddr
   268  }
   269  
   270  // String returns the address that was provided for the server before resolution.
   271  func (addr *ServerAddr) String() string {
   272  	return addr.str
   273  }
   274  
   275  // TCPAddr returns the resolved TCP address for the server.
   276  func (addr *ServerAddr) TCPAddr() *net.TCPAddr {
   277  	return addr.tcp
   278  }
   279  
   280  // DialWithInfo establishes a new session to the cluster identified by info.
   281  func DialWithInfo(info *DialInfo) (*Session, error) {
   282  	addrs := make([]string, len(info.Addrs))
   283  	for i, addr := range info.Addrs {
   284  		p := strings.LastIndexAny(addr, "]:")
   285  		if p == -1 || addr[p] != ':' {
   286  			// XXX This is untested. The test suite doesn't use the standard port.
   287  			addr += ":27017"
   288  		}
   289  		addrs[i] = addr
   290  	}
   291  	cluster := newCluster(addrs, info.Direct, dialer{info.Dial, info.DialServer})
   292  	session := newSession(Eventual, cluster, info.Timeout)
   293  	session.defaultdb = info.Database
   294  	if session.defaultdb == "" {
   295  		session.defaultdb = "test"
   296  	}
   297  	if info.Username != "" {
   298  		db := info.Database
   299  		if db == "" {
   300  			db = "admin"
   301  		}
   302  		session.dialAuth = &authInfo{db, info.Username, info.Password}
   303  		session.auth = []authInfo{*session.dialAuth}
   304  	}
   305  	cluster.Release()
   306  
   307  	// People get confused when we return a session that is not actually
   308  	// established to any servers yet (e.g. what if url was wrong). So,
   309  	// ping the server to ensure there's someone there, and abort if it
   310  	// fails.
   311  	if err := session.Ping(); err != nil {
   312  		session.Close()
   313  		return nil, err
   314  	}
   315  	session.SetMode(Strong, true)
   316  	return session, nil
   317  }
   318  
   319  func isOptSep(c rune) bool {
   320  	return c == ';' || c == '&'
   321  }
   322  
   323  type urlInfo struct {
   324  	addrs   []string
   325  	user    string
   326  	pass    string
   327  	db      string
   328  	options map[string]string
   329  }
   330  
   331  func parseURL(url string) (*urlInfo, error) {
   332  	if strings.HasPrefix(url, "mongodb://") {
   333  		url = url[10:]
   334  	}
   335  	info := &urlInfo{options: make(map[string]string)}
   336  	if c := strings.Index(url, "?"); c != -1 {
   337  		for _, pair := range strings.FieldsFunc(url[c+1:], isOptSep) {
   338  			l := strings.SplitN(pair, "=", 2)
   339  			if len(l) != 2 || l[0] == "" || l[1] == "" {
   340  				return nil, errors.New("Connection option must be key=value: " + pair)
   341  			}
   342  			info.options[l[0]] = l[1]
   343  		}
   344  		url = url[:c]
   345  	}
   346  	if c := strings.Index(url, "@"); c != -1 {
   347  		pair := strings.SplitN(url[:c], ":", 2)
   348  		if len(pair) != 2 || pair[0] == "" {
   349  			return nil, errors.New("Credentials must be provided as user:pass@host")
   350  		}
   351  		info.user = pair[0]
   352  		info.pass = pair[1]
   353  		url = url[c+1:]
   354  	}
   355  	if c := strings.Index(url, "/"); c != -1 {
   356  		info.db = url[c+1:]
   357  		url = url[:c]
   358  	}
   359  	info.addrs = strings.Split(url, ",")
   360  	return info, nil
   361  }
   362  
   363  func newSession(consistency mode, cluster *mongoCluster, timeout time.Duration) (session *Session) {
   364  	cluster.Acquire()
   365  	session = &Session{cluster_: cluster, syncTimeout: timeout, sockTimeout: timeout}
   366  	debugf("New session %p on cluster %p", session, cluster)
   367  	session.SetMode(consistency, true)
   368  	session.SetSafe(&Safe{})
   369  	session.queryConfig.prefetch = defaultPrefetch
   370  	return session
   371  }
   372  
   373  func copySession(session *Session, keepAuth bool) (s *Session) {
   374  	cluster := session.cluster()
   375  	cluster.Acquire()
   376  	if session.masterSocket != nil {
   377  		session.masterSocket.Acquire()
   378  	}
   379  	if session.slaveSocket != nil {
   380  		session.slaveSocket.Acquire()
   381  	}
   382  	var auth []authInfo
   383  	if keepAuth {
   384  		auth = make([]authInfo, len(session.auth))
   385  		copy(auth, session.auth)
   386  	} else if session.dialAuth != nil {
   387  		auth = []authInfo{*session.dialAuth}
   388  	}
   389  	scopy := *session
   390  	scopy.m = sync.RWMutex{}
   391  	scopy.auth = auth
   392  	s = &scopy
   393  	debugf("New session %p on cluster %p (copy from %p)", s, cluster, session)
   394  	return s
   395  }
   396  
   397  // LiveServers returns a list of server addresses which are
   398  // currently known to be alive.
   399  func (s *Session) LiveServers() (addrs []string) {
   400  	s.m.RLock()
   401  	addrs = s.cluster().LiveServers()
   402  	s.m.RUnlock()
   403  	return addrs
   404  }
   405  
   406  // DB returns a value representing the named database. If name
   407  // is empty, the database name provided in the dialed URL is
   408  // used instead. If that is also empty, "test" is used as a
   409  // fallback in a way equivalent to the mongo shell.
   410  //
   411  // Creating this value is a very lightweight operation, and
   412  // involves no network communication.
   413  func (s *Session) DB(name string) *Database {
   414  	if name == "" {
   415  		name = s.defaultdb
   416  	}
   417  	return &Database{s, name}
   418  }
   419  
   420  // C returns a value representing the named collection.
   421  //
   422  // Creating this value is a very lightweight operation, and
   423  // involves no network communication.
   424  func (db *Database) C(name string) *Collection {
   425  	return &Collection{db, name, db.Name + "." + name}
   426  }
   427  
   428  // With returns a copy of db that uses session s.
   429  func (db *Database) With(s *Session) *Database {
   430  	newdb := *db
   431  	newdb.Session = s
   432  	return &newdb
   433  }
   434  
   435  // With returns a copy of c that uses session s.
   436  func (c *Collection) With(s *Session) *Collection {
   437  	newdb := *c.Database
   438  	newdb.Session = s
   439  	newc := *c
   440  	newc.Database = &newdb
   441  	return &newc
   442  }
   443  
   444  // GridFS returns a GridFS value representing collections in db that
   445  // follow the standard GridFS specification.
   446  // The provided prefix (sometimes known as root) will determine which
   447  // collections to use, and is usually set to "fs" when there is a
   448  // single GridFS in the database.
   449  //
   450  // See the GridFS Create, Open, and OpenId methods for more details.
   451  //
   452  // Relevant documentation:
   453  //
   454  //     http://www.mongodb.org/display/DOCS/GridFS
   455  //     http://www.mongodb.org/display/DOCS/GridFS+Tools
   456  //     http://www.mongodb.org/display/DOCS/GridFS+Specification
   457  //
   458  func (db *Database) GridFS(prefix string) *GridFS {
   459  	return newGridFS(db, prefix)
   460  }
   461  
   462  // Run issues the provided command against the database and unmarshals
   463  // its result in the respective argument. The cmd argument may be either
   464  // a string with the command name itself, in which case an empty document of
   465  // the form bson.M{cmd: 1} will be used, or it may be a full command document.
   466  //
   467  // Note that MongoDB considers the first marshalled key as the command
   468  // name, so when providing a command with options, it's important to
   469  // use an ordering-preserving document, such as a struct value or an
   470  // instance of bson.D.  For instance:
   471  //
   472  //     db.Run(bson.D{{"create", "mycollection"}, {"size", 1024}})
   473  //
   474  // For privilleged commands typically run against the "admin" database, see
   475  // the Run method in the Session type.
   476  //
   477  // Relevant documentation:
   478  //
   479  //     http://www.mongodb.org/display/DOCS/Commands
   480  //     http://www.mongodb.org/display/DOCS/List+of+Database+CommandSkips
   481  //
   482  func (db *Database) Run(cmd interface{}, result interface{}) error {
   483  	if name, ok := cmd.(string); ok {
   484  		cmd = bson.D{{name, 1}}
   485  	}
   486  	return db.C("$cmd").Find(cmd).One(result)
   487  }
   488  
   489  // Login authenticates against MongoDB with the provided credentials.  The
   490  // authentication is valid for the whole session and will stay valid until
   491  // Logout is explicitly called for the same database, or the session is
   492  // closed.
   493  //
   494  // Concurrent Login calls will work correctly.
   495  func (db *Database) Login(user, pass string) (err error) {
   496  	session := db.Session
   497  	dbname := db.Name
   498  
   499  	socket, err := session.acquireSocket(true)
   500  	if err != nil {
   501  		return err
   502  	}
   503  	defer socket.Release()
   504  
   505  	err = socket.Login(dbname, user, pass)
   506  	if err != nil {
   507  		return err
   508  	}
   509  
   510  	session.m.Lock()
   511  	defer session.m.Unlock()
   512  
   513  	for _, a := range session.auth {
   514  		if a.db == dbname {
   515  			a.user = user
   516  			a.pass = pass
   517  			return nil
   518  		}
   519  	}
   520  	session.auth = append(session.auth, authInfo{dbname, user, pass})
   521  	return nil
   522  }
   523  
   524  func (s *Session) socketLogin(socket *mongoSocket) error {
   525  	for _, a := range s.auth {
   526  		if err := socket.Login(a.db, a.user, a.pass); err != nil {
   527  			return err
   528  		}
   529  	}
   530  	return nil
   531  }
   532  
   533  // Logout removes any established authentication credentials for the database.
   534  func (db *Database) Logout() {
   535  	session := db.Session
   536  	dbname := db.Name
   537  	session.m.Lock()
   538  	found := false
   539  	for i, a := range session.auth {
   540  		if a.db == dbname {
   541  			copy(session.auth[i:], session.auth[i+1:])
   542  			session.auth = session.auth[:len(session.auth)-1]
   543  			found = true
   544  			break
   545  		}
   546  	}
   547  	if found {
   548  		if session.masterSocket != nil {
   549  			session.masterSocket.Logout(dbname)
   550  		}
   551  		if session.slaveSocket != nil {
   552  			session.slaveSocket.Logout(dbname)
   553  		}
   554  	}
   555  	session.m.Unlock()
   556  }
   557  
   558  // LogoutAll removes all established authentication credentials for the session.
   559  func (s *Session) LogoutAll() {
   560  	s.m.Lock()
   561  	for _, a := range s.auth {
   562  		if s.masterSocket != nil {
   563  			s.masterSocket.Logout(a.db)
   564  		}
   565  		if s.slaveSocket != nil {
   566  			s.slaveSocket.Logout(a.db)
   567  		}
   568  	}
   569  	s.auth = s.auth[0:0]
   570  	s.m.Unlock()
   571  }
   572  
   573  // User represents a MongoDB user.
   574  //
   575  // Relevant documentation:
   576  //
   577  //     http://docs.mongodb.org/manual/reference/privilege-documents/
   578  //     http://docs.mongodb.org/manual/reference/user-privileges/
   579  //
   580  type User struct {
   581  	// Username is how the user identifies itself to the system.
   582  	Username string `bson:"user"`
   583  
   584  	// Password is the plaintext password for the user. If set,
   585  	// the UpsertUser method will hash it into PasswordHash and
   586  	// unset it before the user is added to the database.
   587  	Password string `bson:",omitempty"`
   588  
   589  	// PasswordHash is the MD5 hash of Username+":mongo:"+Password.
   590  	PasswordHash string `bson:"pwd,omitempty"`
   591  
   592  	// UserSource indicates where to look for this user's credentials.
   593  	// It may be set to a database name, or to "$external" for
   594  	// consulting an external resource such as Kerberos. UserSource
   595  	// must not be set if Password or PasswordHash are present.
   596  	UserSource string `bson:"userSource,omitempty"`
   597  
   598  	// Roles indicates the set of roles the user will be provided.
   599  	// See the Role constants.
   600  	Roles []Role `bson:"roles"`
   601  
   602  	// OtherDBRoles allows assigning roles in other databases from
   603  	// user documents inserted in the admin database. This field
   604  	// only works in the admin database.
   605  	OtherDBRoles map[string][]Role `bson:"otherDBRoles,omitempty"`
   606  }
   607  
   608  type Role string
   609  
   610  const (
   611  	// Relevant documentation:
   612  	//
   613  	//     http://docs.mongodb.org/manual/reference/user-privileges/
   614  	//
   615  	RoleRead         Role = "read"
   616  	RoleReadAny      Role = "readAnyDatabase"
   617  	RoleReadWrite    Role = "readWrite"
   618  	RoleReadWriteAny Role = "readWriteAnyDatabase"
   619  	RoleDBAdmin      Role = "dbAdmin"
   620  	RoleDBAdminAny   Role = "dbAdminAnyDatabase"
   621  	RoleUserAdmin    Role = "userAdmin"
   622  	RoleUserAdminAny Role = "UserAdminAnyDatabase"
   623  	RoleClusterAdmin Role = "clusterAdmin"
   624  )
   625  
   626  // UpsertUser updates the authentication credentials and the roles for
   627  // a MongoDB user within the db database. If the named user doesn't exist
   628  // it will be created.
   629  //
   630  // This method should only be used from MongoDB 2.4 and on. For older
   631  // MongoDB releases, use the obsolete AddUser method instead.
   632  //
   633  // Relevant documentation:
   634  //
   635  //     http://docs.mongodb.org/manual/reference/user-privileges/
   636  //     http://docs.mongodb.org/manual/reference/privilege-documents/
   637  //
   638  func (db *Database) UpsertUser(user *User) error {
   639  	if user.Username == "" {
   640  		return fmt.Errorf("user has no Username")
   641  	}
   642  	if user.Password != "" {
   643  		psum := md5.New()
   644  		psum.Write([]byte(user.Username + ":mongo:" + user.Password))
   645  		user.PasswordHash = hex.EncodeToString(psum.Sum(nil))
   646  		user.Password = ""
   647  	}
   648  	if user.PasswordHash != "" && user.UserSource != "" {
   649  		return fmt.Errorf("user has both Password/PasswordHash and UserSource set")
   650  	}
   651  	if len(user.OtherDBRoles) > 0 && db.Name != "admin" {
   652  		return fmt.Errorf("user with OtherDBRoles is only supported in admin database")
   653  	}
   654  	var unset bson.D
   655  	if user.PasswordHash == "" {
   656  		unset = append(unset, bson.DocElem{"pwd", 1})
   657  	}
   658  	if user.UserSource == "" {
   659  		unset = append(unset, bson.DocElem{"userSource", 1})
   660  	}
   661  	// user.Roles is always sent, as it's the way MongoDB distinguishes
   662  	// old-style documents from new-style documents.
   663  	if len(user.OtherDBRoles) == 0 {
   664  		unset = append(unset, bson.DocElem{"otherDBRoles", 1})
   665  	}
   666  	c := db.C("system.users")
   667  	_, err := c.Upsert(bson.D{{"user", user.Username}}, bson.D{{"$unset", unset}, {"$set", user}})
   668  	return err
   669  }
   670  
   671  // AddUser creates or updates the authentication credentials of user within
   672  // the db database.
   673  //
   674  // This method is obsolete and should only be used with MongoDB 2.2 or
   675  // earlier. For MongoDB 2.4 and on, use UpsertUser instead.
   676  func (db *Database) AddUser(user, pass string, readOnly bool) error {
   677  	psum := md5.New()
   678  	psum.Write([]byte(user + ":mongo:" + pass))
   679  	digest := hex.EncodeToString(psum.Sum(nil))
   680  	c := db.C("system.users")
   681  	_, err := c.Upsert(bson.M{"user": user}, bson.M{"$set": bson.M{"user": user, "pwd": digest, "readOnly": readOnly}})
   682  	return err
   683  }
   684  
   685  // RemoveUser removes the authentication credentials of user from the database.
   686  func (db *Database) RemoveUser(user string) error {
   687  	c := db.C("system.users")
   688  	return c.Remove(bson.M{"user": user})
   689  }
   690  
   691  type indexSpec struct {
   692  	Name, NS       string
   693  	Key            bson.D
   694  	Unique         bool ",omitempty"
   695  	DropDups       bool "dropDups,omitempty"
   696  	Background     bool ",omitempty"
   697  	Sparse         bool ",omitempty"
   698  	Bits, Min, Max int  ",omitempty"
   699  	ExpireAfter    int  "expireAfterSeconds,omitempty"
   700  }
   701  
   702  type Index struct {
   703  	Key        []string // Index key fields; prefix name with dash (-) for descending order
   704  	Unique     bool     // Prevent two documents from having the same index key
   705  	DropDups   bool     // Drop documents with the same index key as a previously indexed one
   706  	Background bool     // Build index in background and return immediately
   707  	Sparse     bool     // Only index documents containing the Key fields
   708  
   709  	ExpireAfter time.Duration // Periodically delete docs with indexed time.Time older than that.
   710  
   711  	Name string // Index name, computed by EnsureIndex
   712  
   713  	Bits, Min, Max int // Properties for spatial indexes
   714  }
   715  
   716  func parseIndexKey(key []string) (name string, realKey bson.D, err error) {
   717  	var order interface{}
   718  	for _, field := range key {
   719  		raw := field
   720  		if name != "" {
   721  			name += "_"
   722  		}
   723  		var kind string
   724  		if field != "" {
   725  			if field[0] == '$' {
   726  				if c := strings.Index(field, ":"); c > 1 && c < len(field)-1 {
   727  					kind = field[1:c]
   728  					field = field[c+1:]
   729  					name += field + "_" + kind
   730  				}
   731  			}
   732  			switch field[0] {
   733  			case '$':
   734  				// Logic above failed. Reset and error.
   735  				field = ""
   736  			case '@':
   737  				order = "2d"
   738  				field = field[1:]
   739  				// The shell used to render this field as key_ instead of key_2d,
   740  				// and mgo followed suit. This has been fixed in recent server
   741  				// releases, and mgo followed as well.
   742  				name += field + "_2d"
   743  			case '-':
   744  				order = -1
   745  				field = field[1:]
   746  				name += field + "_-1"
   747  			case '+':
   748  				field = field[1:]
   749  				fallthrough
   750  			default:
   751  				if kind == "" {
   752  					order = 1
   753  					name += field + "_1"
   754  				} else {
   755  					order = kind
   756  				}
   757  			}
   758  		}
   759  		if field == "" || kind != "" && order != kind {
   760  			return "", nil, fmt.Errorf(`Invalid index key: want "[$<kind>:][-]<field name>", got %q`, raw)
   761  		}
   762  		realKey = append(realKey, bson.DocElem{field, order})
   763  	}
   764  	if name == "" {
   765  		return "", nil, errors.New("Invalid index key: no fields provided")
   766  	}
   767  	return
   768  }
   769  
   770  // EnsureIndexKey ensures an index with the given key exists, creating it
   771  // if necessary.
   772  //
   773  // This example:
   774  //
   775  //     err := collection.EnsureIndexKey("a", "b")
   776  //
   777  // Is equivalent to:
   778  //
   779  //     err := collection.EnsureIndex(mgo.Index{Key: []string{"a", "b"}})
   780  //
   781  // See the EnsureIndex method for more details.
   782  func (c *Collection) EnsureIndexKey(key ...string) error {
   783  	return c.EnsureIndex(Index{Key: key})
   784  }
   785  
   786  // EnsureIndex ensures an index with the given key exists, creating it with
   787  // the provided parameters if necessary.
   788  //
   789  // Once EnsureIndex returns successfully, following requests for the same index
   790  // will not contact the server unless Collection.DropIndex is used to drop the
   791  // same index, or Session.ResetIndexCache is called.
   792  //
   793  // For example:
   794  //
   795  //     index := Index{
   796  //         Key: []string{"lastname", "firstname"},
   797  //         Unique: true,
   798  //         DropDups: true,
   799  //         Background: true, // See notes.
   800  //         Sparse: true,
   801  //     }
   802  //     err := collection.EnsureIndex(index)
   803  //
   804  // The Key value determines which fields compose the index. The index ordering
   805  // will be ascending by default.  To obtain an index with a descending order,
   806  // the field name should be prefixed by a dash (e.g. []string{"-time"}).
   807  //
   808  // If Unique is true, the index must necessarily contain only a single
   809  // document per Key.  With DropDups set to true, documents with the same key
   810  // as a previously indexed one will be dropped rather than an error returned.
   811  //
   812  // If Background is true, other connections will be allowed to proceed using
   813  // the collection without the index while it's being built. Note that the
   814  // session executing EnsureIndex will be blocked for as long as it takes for
   815  // the index to be built.
   816  //
   817  // If Sparse is true, only documents containing the provided Key fields will be
   818  // included in the index.  When using a sparse index for sorting, only indexed
   819  // documents will be returned.
   820  //
   821  // If ExpireAfter is non-zero, the server will periodically scan the collection
   822  // and remove documents containing an indexed time.Time field with a value
   823  // older than ExpireAfter. See the documentation for details:
   824  //
   825  //     http://docs.mongodb.org/manual/tutorial/expire-data
   826  //
   827  // Other kinds of indexes are also supported through that API. Here is an example:
   828  //
   829  //     index := Index{
   830  //         Key: []string{"$2d:loc"},
   831  //         Bits: 26,
   832  //     }
   833  //     err := collection.EnsureIndex(index)
   834  //
   835  // The example above requests the creation of a "2d" index for the "loc" field.
   836  //
   837  // The 2D index bounds may be changed using the Min and Max attributes of the
   838  // Index value.  The default bound setting of (-180, 180) is suitable for
   839  // latitude/longitude pairs.
   840  //
   841  // The Bits parameter sets the precision of the 2D geohash values.  If not
   842  // provided, 26 bits are used, which is roughly equivalent to 1 foot of
   843  // precision for the default (-180, 180) index bounds.
   844  //
   845  // Relevant documentation:
   846  //
   847  //     http://www.mongodb.org/display/DOCS/Indexes
   848  //     http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ
   849  //     http://www.mongodb.org/display/DOCS/Indexing+as+a+Background+Operation
   850  //     http://www.mongodb.org/display/DOCS/Geospatial+Indexing
   851  //     http://www.mongodb.org/display/DOCS/Multikeys
   852  //
   853  func (c *Collection) EnsureIndex(index Index) error {
   854  	name, realKey, err := parseIndexKey(index.Key)
   855  	if err != nil {
   856  		return err
   857  	}
   858  
   859  	session := c.Database.Session
   860  	cacheKey := c.FullName + "\x00" + name
   861  	if session.cluster().HasCachedIndex(cacheKey) {
   862  		return nil
   863  	}
   864  
   865  	spec := indexSpec{
   866  		Name:        name,
   867  		NS:          c.FullName,
   868  		Key:         realKey,
   869  		Unique:      index.Unique,
   870  		DropDups:    index.DropDups,
   871  		Background:  index.Background,
   872  		Sparse:      index.Sparse,
   873  		Bits:        index.Bits,
   874  		Min:         index.Min,
   875  		Max:         index.Max,
   876  		ExpireAfter: int(index.ExpireAfter / time.Second),
   877  	}
   878  
   879  	session = session.Clone()
   880  	defer session.Close()
   881  	session.SetMode(Strong, false)
   882  	session.EnsureSafe(&Safe{})
   883  
   884  	db := c.Database.With(session)
   885  	err = db.C("system.indexes").Insert(&spec)
   886  	if err == nil {
   887  		session.cluster().CacheIndex(cacheKey, true)
   888  	}
   889  	session.Close()
   890  	return err
   891  }
   892  
   893  // DropIndex removes the index with key from the collection.
   894  //
   895  // The key value determines which fields compose the index. The index ordering
   896  // will be ascending by default.  To obtain an index with a descending order,
   897  // the field name should be prefixed by a dash (e.g. []string{"-time"}).
   898  //
   899  // For example:
   900  //
   901  //     err := collection.DropIndex("lastname", "firstname")
   902  //
   903  // See the EnsureIndex method for more details on indexes.
   904  func (c *Collection) DropIndex(key ...string) error {
   905  	name, _, err := parseIndexKey(key)
   906  	if err != nil {
   907  		return err
   908  	}
   909  
   910  	session := c.Database.Session
   911  	cacheKey := c.FullName + "\x00" + name
   912  	session.cluster().CacheIndex(cacheKey, false)
   913  
   914  	session = session.Clone()
   915  	defer session.Close()
   916  	session.SetMode(Strong, false)
   917  
   918  	db := c.Database.With(session)
   919  	result := struct {
   920  		ErrMsg string
   921  		Ok     bool
   922  	}{}
   923  	err = db.Run(bson.D{{"dropIndexes", c.Name}, {"index", name}}, &result)
   924  	if err != nil {
   925  		return err
   926  	}
   927  	if !result.Ok {
   928  		return errors.New(result.ErrMsg)
   929  	}
   930  	return nil
   931  }
   932  
   933  // Indexes returns a list of all indexes for the collection.
   934  //
   935  // For example, this snippet would drop all available indexes:
   936  //
   937  //   indexes, err := collection.Indexes()
   938  //   if err != nil {
   939  //       return err
   940  //   }
   941  //   for _, index := range indexes {
   942  //       err = collection.DropIndex(index.Key...)
   943  //       if err != nil {
   944  //           return err
   945  //       }
   946  //   }
   947  //
   948  // See the EnsureIndex method for more details on indexes.
   949  func (c *Collection) Indexes() (indexes []Index, err error) {
   950  	query := c.Database.C("system.indexes").Find(bson.M{"ns": c.FullName})
   951  	iter := query.Sort("name").Iter()
   952  	for {
   953  		var spec indexSpec
   954  		if !iter.Next(&spec) {
   955  			break
   956  		}
   957  		index := Index{
   958  			Name:        spec.Name,
   959  			Key:         simpleIndexKey(spec.Key),
   960  			Unique:      spec.Unique,
   961  			DropDups:    spec.DropDups,
   962  			Background:  spec.Background,
   963  			Sparse:      spec.Sparse,
   964  			ExpireAfter: time.Duration(spec.ExpireAfter) * time.Second,
   965  		}
   966  		indexes = append(indexes, index)
   967  	}
   968  	err = iter.Close()
   969  	return
   970  }
   971  
   972  func simpleIndexKey(realKey bson.D) (key []string) {
   973  	for i := range realKey {
   974  		field := realKey[i].Name
   975  		vi, ok := realKey[i].Value.(int)
   976  		if !ok {
   977  			vf, _ := realKey[i].Value.(float64)
   978  			vi = int(vf)
   979  		}
   980  		if vi == 1 {
   981  			key = append(key, field)
   982  			continue
   983  		}
   984  		if vi == -1 {
   985  			key = append(key, "-"+field)
   986  			continue
   987  		}
   988  		if vs, ok := realKey[i].Value.(string); ok {
   989  			key = append(key, "$"+vs+":"+field)
   990  			continue
   991  		}
   992  		panic("Got unknown index key type for field " + field)
   993  	}
   994  	return
   995  }
   996  
   997  // ResetIndexCache() clears the cache of previously ensured indexes.
   998  // Following requests to EnsureIndex will contact the server.
   999  func (s *Session) ResetIndexCache() {
  1000  	s.cluster().ResetIndexCache()
  1001  }
  1002  
  1003  // New creates a new session with the same parameters as the original
  1004  // session, including consistency, batch size, prefetching, safety mode,
  1005  // etc. The returned session will use sockets from the pool, so there's
  1006  // a chance that writes just performed in another session may not yet
  1007  // be visible.
  1008  //
  1009  // Login information from the original session will not be copied over
  1010  // into the new session unless it was provided through the initial URL
  1011  // for the Dial function.
  1012  //
  1013  // See the Copy and Clone methods.
  1014  //
  1015  func (s *Session) New() *Session {
  1016  	s.m.Lock()
  1017  	scopy := copySession(s, false)
  1018  	s.m.Unlock()
  1019  	scopy.Refresh()
  1020  	return scopy
  1021  }
  1022  
  1023  // Copy works just like New, but preserves the exact authentication
  1024  // information from the original session.
  1025  func (s *Session) Copy() *Session {
  1026  	s.m.Lock()
  1027  	scopy := copySession(s, true)
  1028  	s.m.Unlock()
  1029  	scopy.Refresh()
  1030  	return scopy
  1031  }
  1032  
  1033  // Clone works just like Copy, but also reuses the same socket as the original
  1034  // session, in case it had already reserved one due to its consistency
  1035  // guarantees.  This behavior ensures that writes performed in the old session
  1036  // are necessarily observed when using the new session, as long as it was a
  1037  // strong or monotonic session.  That said, it also means that long operations
  1038  // may cause other goroutines using the original session to wait.
  1039  func (s *Session) Clone() *Session {
  1040  	s.m.Lock()
  1041  	scopy := copySession(s, true)
  1042  	s.m.Unlock()
  1043  	return scopy
  1044  }
  1045  
  1046  // Close terminates the session.  It's a runtime error to use a session
  1047  // after it has been closed.
  1048  func (s *Session) Close() {
  1049  	s.m.Lock()
  1050  	if s.cluster_ != nil {
  1051  		debugf("Closing session %p", s)
  1052  		s.unsetSocket()
  1053  		s.cluster_.Release()
  1054  		s.cluster_ = nil
  1055  	}
  1056  	s.m.Unlock()
  1057  }
  1058  
  1059  func (s *Session) cluster() *mongoCluster {
  1060  	if s.cluster_ == nil {
  1061  		panic("Session already closed")
  1062  	}
  1063  	return s.cluster_
  1064  }
  1065  
  1066  // Refresh puts back any reserved sockets in use and restarts the consistency
  1067  // guarantees according to the current consistency setting for the session.
  1068  func (s *Session) Refresh() {
  1069  	s.m.Lock()
  1070  	s.slaveOk = s.consistency != Strong
  1071  	s.unsetSocket()
  1072  	s.m.Unlock()
  1073  }
  1074  
  1075  // SetMode changes the consistency mode for the session.
  1076  //
  1077  // In the Strong consistency mode reads and writes will always be made to
  1078  // the master server using a unique connection so that reads and writes are
  1079  // fully consistent, ordered, and observing the most up-to-date data.
  1080  // This offers the least benefits in terms of distributing load, but the
  1081  // most guarantees.  See also Monotonic and Eventual.
  1082  //
  1083  // In the Monotonic consistency mode reads may not be entirely up-to-date,
  1084  // but they will always see the history of changes moving forward, the data
  1085  // read will be consistent across sequential queries in the same session,
  1086  // and modifications made within the session will be observed in following
  1087  // queries (read-your-writes).
  1088  //
  1089  // In practice, the Monotonic mode is obtained by performing initial reads
  1090  // against a unique connection to an arbitrary slave, if one is available,
  1091  // and once the first write happens, the session connection is switched over
  1092  // to the master server.  This manages to distribute some of the reading
  1093  // load with slaves, while maintaining some useful guarantees.
  1094  //
  1095  // In the Eventual consistency mode reads will be made to any slave in the
  1096  // cluster, if one is available, and sequential reads will not necessarily
  1097  // be made with the same connection.  This means that data may be observed
  1098  // out of order.  Writes will of course be issued to the master, but
  1099  // independent writes in the same Eventual session may also be made with
  1100  // independent connections, so there are also no guarantees in terms of
  1101  // write ordering (no read-your-writes guarantees either).
  1102  //
  1103  // The Eventual mode is the fastest and most resource-friendly, but is
  1104  // also the one offering the least guarantees about ordering of the data
  1105  // read and written.
  1106  //
  1107  // If refresh is true, in addition to ensuring the session is in the given
  1108  // consistency mode, the consistency guarantees will also be reset (e.g.
  1109  // a Monotonic session will be allowed to read from slaves again).  This is
  1110  // equivalent to calling the Refresh function.
  1111  //
  1112  // Shifting between Monotonic and Strong modes will keep a previously
  1113  // reserved connection for the session unless refresh is true or the
  1114  // connection is unsuitable (to a slave server in a Strong session).
  1115  func (s *Session) SetMode(consistency mode, refresh bool) {
  1116  	s.m.Lock()
  1117  	debugf("Session %p: setting mode %d with refresh=%v (master=%p, slave=%p)", s, consistency, refresh, s.masterSocket, s.slaveSocket)
  1118  	s.consistency = consistency
  1119  	if refresh {
  1120  		s.slaveOk = s.consistency != Strong
  1121  		s.unsetSocket()
  1122  	} else if s.consistency == Strong {
  1123  		s.slaveOk = false
  1124  	} else if s.masterSocket == nil {
  1125  		s.slaveOk = true
  1126  	}
  1127  	s.m.Unlock()
  1128  }
  1129  
  1130  // Mode returns the current consistency mode for the session.
  1131  func (s *Session) Mode() mode {
  1132  	s.m.RLock()
  1133  	mode := s.consistency
  1134  	s.m.RUnlock()
  1135  	return mode
  1136  }
  1137  
  1138  // SetSyncTimeout sets the amount of time an operation with this session
  1139  // will wait before returning an error in case a connection to a usable
  1140  // server can't be established. Set it to zero to wait forever. The
  1141  // default value is 7 seconds.
  1142  func (s *Session) SetSyncTimeout(d time.Duration) {
  1143  	s.m.Lock()
  1144  	s.syncTimeout = d
  1145  	s.m.Unlock()
  1146  }
  1147  
  1148  // SetSocketTimeout sets the amount of time to wait for a non-responding
  1149  // socket to the database before it is forcefully closed.
  1150  func (s *Session) SetSocketTimeout(d time.Duration) {
  1151  	s.m.Lock()
  1152  	s.sockTimeout = d
  1153  	if s.masterSocket != nil {
  1154  		s.masterSocket.SetTimeout(d)
  1155  	}
  1156  	if s.slaveSocket != nil {
  1157  		s.slaveSocket.SetTimeout(d)
  1158  	}
  1159  	s.m.Unlock()
  1160  }
  1161  
  1162  // SetCursorTimeout changes the standard timeout period that the server
  1163  // enforces on created cursors. The only supported value right now is
  1164  // 0, which disables the timeout. The standard server timeout is 10 minutes.
  1165  func (s *Session) SetCursorTimeout(d time.Duration) {
  1166  	s.m.Lock()
  1167  	if d == 0 {
  1168  		s.queryConfig.op.flags |= flagNoCursorTimeout
  1169  	} else {
  1170  		panic("SetCursorTimeout: only 0 (disable timeout) supported for now")
  1171  	}
  1172  	s.m.Unlock()
  1173  }
  1174  
  1175  // SetBatch sets the default batch size used when fetching documents from the
  1176  // database. It's possible to change this setting on a per-query basis as
  1177  // well, using the Query.Batch method.
  1178  //
  1179  // The default batch size is defined by the database itself.  As of this
  1180  // writing, MongoDB will use an initial size of min(100 docs, 4MB) on the
  1181  // first batch, and 4MB on remaining ones.
  1182  func (s *Session) SetBatch(n int) {
  1183  	if n == 1 {
  1184  		// Server interprets 1 as -1 and closes the cursor (!?)
  1185  		n = 2
  1186  	}
  1187  	s.m.Lock()
  1188  	s.queryConfig.op.limit = int32(n)
  1189  	s.m.Unlock()
  1190  }
  1191  
  1192  // SetPrefetch sets the default point at which the next batch of results will be
  1193  // requested.  When there are p*batch_size remaining documents cached in an
  1194  // Iter, the next batch will be requested in background. For instance, when
  1195  // using this:
  1196  //
  1197  //     session.SetBatch(200)
  1198  //     session.SetPrefetch(0.25)
  1199  //
  1200  // and there are only 50 documents cached in the Iter to be processed, the
  1201  // next batch of 200 will be requested. It's possible to change this setting on
  1202  // a per-query basis as well, using the Prefetch method of Query.
  1203  //
  1204  // The default prefetch value is 0.25.
  1205  func (s *Session) SetPrefetch(p float64) {
  1206  	s.m.Lock()
  1207  	s.queryConfig.prefetch = p
  1208  	s.m.Unlock()
  1209  }
  1210  
  1211  // See SetSafe for details on the Safe type.
  1212  type Safe struct {
  1213  	W        int    // Min # of servers to ack before success
  1214  	WMode    string // Write mode for MongoDB 2.0+ (e.g. "majority")
  1215  	WTimeout int    // Milliseconds to wait for W before timing out
  1216  	FSync    bool   // Should servers sync to disk before returning success
  1217  	J        bool   // Wait for next group commit if journaling; no effect otherwise
  1218  }
  1219  
  1220  // Safe returns the current safety mode for the session.
  1221  func (s *Session) Safe() (safe *Safe) {
  1222  	s.m.Lock()
  1223  	defer s.m.Unlock()
  1224  	if s.safeOp != nil {
  1225  		cmd := s.safeOp.query.(*getLastError)
  1226  		safe = &Safe{WTimeout: cmd.WTimeout, FSync: cmd.FSync, J: cmd.J}
  1227  		switch w := cmd.W.(type) {
  1228  		case string:
  1229  			safe.WMode = w
  1230  		case int:
  1231  			safe.W = w
  1232  		}
  1233  	}
  1234  	return
  1235  }
  1236  
  1237  // SetSafe changes the session safety mode.
  1238  //
  1239  // If the safe parameter is nil, the session is put in unsafe mode, and writes
  1240  // become fire-and-forget, without error checking.  The unsafe mode is faster
  1241  // since operations won't hold on waiting for a confirmation.
  1242  //
  1243  // If the safe parameter is not nil, any changing query (insert, update, ...)
  1244  // will be followed by a getLastError command with the specified parameters,
  1245  // to ensure the request was correctly processed.
  1246  //
  1247  // The safe.W parameter determines how many servers should confirm a write
  1248  // before the operation is considered successful.  If set to 0 or 1, the
  1249  // command will return as soon as the master is done with the request.
  1250  // If safe.WTimeout is greater than zero, it determines how many milliseconds
  1251  // to wait for the safe.W servers to respond before returning an error.
  1252  //
  1253  // Starting with MongoDB 2.0.0 the safe.WMode parameter can be used instead
  1254  // of W to request for richer semantics. If set to "majority" the server will
  1255  // wait for a majority of members from the replica set to respond before
  1256  // returning. Custom modes may also be defined within the server to create
  1257  // very detailed placement schemas. See the data awareness documentation in
  1258  // the links below for more details (note that MongoDB internally reuses the
  1259  // "w" field name for WMode).
  1260  //
  1261  // If safe.FSync is true and journaling is disabled, the servers will be
  1262  // forced to sync all files to disk immediately before returning. If the
  1263  // same option is true but journaling is enabled, the server will instead
  1264  // await for the next group commit before returning.
  1265  //
  1266  // Since MongoDB 2.0.0, the safe.J option can also be used instead of FSync
  1267  // to force the server to wait for a group commit in case journaling is
  1268  // enabled. The option has no effect if the server has journaling disabled.
  1269  //
  1270  // For example, the following statement will make the session check for
  1271  // errors, without imposing further constraints:
  1272  //
  1273  //     session.SetSafe(&mgo.Safe{})
  1274  //
  1275  // The following statement will force the server to wait for a majority of
  1276  // members of a replica set to return (MongoDB 2.0+ only):
  1277  //
  1278  //     session.SetSafe(&mgo.Safe{WMode: "majority"})
  1279  //
  1280  // The following statement, on the other hand, ensures that at least two
  1281  // servers have flushed the change to disk before confirming the success
  1282  // of operations:
  1283  //
  1284  //     session.EnsureSafe(&mgo.Safe{W: 2, FSync: true})
  1285  //
  1286  // The following statement, on the other hand, disables the verification
  1287  // of errors entirely:
  1288  //
  1289  //     session.SetSafe(nil)
  1290  //
  1291  // See also the EnsureSafe method.
  1292  //
  1293  // Relevant documentation:
  1294  //
  1295  //     http://www.mongodb.org/display/DOCS/getLastError+Command
  1296  //     http://www.mongodb.org/display/DOCS/Verifying+Propagation+of+Writes+with+getLastError
  1297  //     http://www.mongodb.org/display/DOCS/Data+Center+Awareness
  1298  //
  1299  func (s *Session) SetSafe(safe *Safe) {
  1300  	s.m.Lock()
  1301  	s.safeOp = nil
  1302  	s.ensureSafe(safe)
  1303  	s.m.Unlock()
  1304  }
  1305  
  1306  // EnsureSafe compares the provided safety parameters with the ones
  1307  // currently in use by the session and picks the most conservative
  1308  // choice for each setting.
  1309  //
  1310  // That is:
  1311  //
  1312  //     - safe.WMode is always used if set.
  1313  //     - safe.W is used if larger than the current W and WMode is empty.
  1314  //     - safe.FSync is always used if true.
  1315  //     - safe.J is used if FSync is false.
  1316  //     - safe.WTimeout is used if set and smaller than the current WTimeout.
  1317  //
  1318  // For example, the following statement will ensure the session is
  1319  // at least checking for errors, without enforcing further constraints.
  1320  // If a more conservative SetSafe or EnsureSafe call was previously done,
  1321  // the following call will be ignored.
  1322  //
  1323  //     session.EnsureSafe(&mgo.Safe{})
  1324  //
  1325  // See also the SetSafe method for details on what each option means.
  1326  //
  1327  // Relevant documentation:
  1328  //
  1329  //     http://www.mongodb.org/display/DOCS/getLastError+Command
  1330  //     http://www.mongodb.org/display/DOCS/Verifying+Propagation+of+Writes+with+getLastError
  1331  //     http://www.mongodb.org/display/DOCS/Data+Center+Awareness
  1332  //
  1333  func (s *Session) EnsureSafe(safe *Safe) {
  1334  	s.m.Lock()
  1335  	s.ensureSafe(safe)
  1336  	s.m.Unlock()
  1337  }
  1338  
  1339  func (s *Session) ensureSafe(safe *Safe) {
  1340  	if safe == nil {
  1341  		return
  1342  	}
  1343  
  1344  	var w interface{}
  1345  	if safe.WMode != "" {
  1346  		w = safe.WMode
  1347  	} else if safe.W > 0 {
  1348  		w = safe.W
  1349  	}
  1350  
  1351  	var cmd getLastError
  1352  	if s.safeOp == nil {
  1353  		cmd = getLastError{1, w, safe.WTimeout, safe.FSync, safe.J}
  1354  	} else {
  1355  		// Copy.  We don't want to mutate the existing query.
  1356  		cmd = *(s.safeOp.query.(*getLastError))
  1357  		if cmd.W == nil {
  1358  			cmd.W = w
  1359  		} else if safe.WMode != "" {
  1360  			cmd.W = safe.WMode
  1361  		} else if i, ok := cmd.W.(int); ok && safe.W > i {
  1362  			cmd.W = safe.W
  1363  		}
  1364  		if safe.WTimeout > 0 && safe.WTimeout < cmd.WTimeout {
  1365  			cmd.WTimeout = safe.WTimeout
  1366  		}
  1367  		if safe.FSync {
  1368  			cmd.FSync = true
  1369  			cmd.J = false
  1370  		} else if safe.J && !cmd.FSync {
  1371  			cmd.J = true
  1372  		}
  1373  	}
  1374  	s.safeOp = &queryOp{
  1375  		query:      &cmd,
  1376  		collection: "admin.$cmd",
  1377  		limit:      -1,
  1378  	}
  1379  }
  1380  
  1381  // Run issues the provided command against the "admin" database and
  1382  // and unmarshals its result in the respective argument. The cmd
  1383  // argument may be either a string with the command name itself, in
  1384  // which case an empty document of the form bson.M{cmd: 1} will be used,
  1385  // or it may be a full command document.
  1386  //
  1387  // Note that MongoDB considers the first marshalled key as the command
  1388  // name, so when providing a command with options, it's important to
  1389  // use an ordering-preserving document, such as a struct value or an
  1390  // instance of bson.D.  For instance:
  1391  //
  1392  //     db.Run(bson.D{{"create", "mycollection"}, {"size", 1024}})
  1393  //
  1394  // For commands against arbitrary databases, see the Run method in
  1395  // the Database type.
  1396  //
  1397  // Relevant documentation:
  1398  //
  1399  //     http://www.mongodb.org/display/DOCS/Commands
  1400  //     http://www.mongodb.org/display/DOCS/List+of+Database+CommandSkips
  1401  //
  1402  func (s *Session) Run(cmd interface{}, result interface{}) error {
  1403  	return s.DB("admin").Run(cmd, result)
  1404  }
  1405  
  1406  // SelectServers restricts communication to servers configured with the
  1407  // given tags. For example, the following statement restricts servers
  1408  // used for reading operations to those with both tag "disk" set to
  1409  // "ssd" and tag "rack" set to 1:
  1410  //
  1411  //     session.SelectSlaves(bson.D{{"disk", "ssd"}, {"rack", 1}})
  1412  //
  1413  // Multiple sets of tags may be provided, in which case the used server
  1414  // must match all tags within any one set.
  1415  //
  1416  // If a connection was previously assigned to the session due to the
  1417  // current session mode (see Session.SetMode), the tag selection will
  1418  // only be enforced after the session is refreshed.
  1419  //
  1420  // Relevant documentation:
  1421  //
  1422  //     http://docs.mongodb.org/manual/tutorial/configure-replica-set-tag-sets
  1423  //
  1424  func (s *Session) SelectServers(tags ...bson.D) {
  1425  	s.m.Lock()
  1426  	s.queryConfig.op.serverTags = tags
  1427  	s.m.Unlock()
  1428  }
  1429  
  1430  // Ping runs a trivial ping command just to get in touch with the server.
  1431  func (s *Session) Ping() error {
  1432  	return s.Run("ping", nil)
  1433  }
  1434  
  1435  // Fsync flushes in-memory writes to disk on the server the session
  1436  // is established with. If async is true, the call returns immediately,
  1437  // otherwise it returns after the flush has been made.
  1438  func (s *Session) Fsync(async bool) error {
  1439  	return s.Run(bson.D{{"fsync", 1}, {"async", async}}, nil)
  1440  }
  1441  
  1442  // FsyncLock locks all writes in the specific server the session is
  1443  // established with and returns. Any writes attempted to the server
  1444  // after it is successfully locked will block until FsyncUnlock is
  1445  // called for the same server.
  1446  //
  1447  // This method works on slaves as well, preventing the oplog from being
  1448  // flushed while the server is locked, but since only the server
  1449  // connected to is locked, for locking specific slaves it may be
  1450  // necessary to establish a connection directly to the slave (see
  1451  // Dial's connect=direct option).
  1452  //
  1453  // As an important caveat, note that once a write is attempted and
  1454  // blocks, follow up reads will block as well due to the way the
  1455  // lock is internally implemented in the server. More details at:
  1456  //
  1457  //     https://jira.mongodb.org/browse/SERVER-4243
  1458  //
  1459  // FsyncLock is often used for performing consistent backups of
  1460  // the database files on disk.
  1461  //
  1462  // Relevant documentation:
  1463  //
  1464  //     http://www.mongodb.org/display/DOCS/fsync+Command
  1465  //     http://www.mongodb.org/display/DOCS/Backups
  1466  //
  1467  func (s *Session) FsyncLock() error {
  1468  	return s.Run(bson.D{{"fsync", 1}, {"lock", true}}, nil)
  1469  }
  1470  
  1471  // FsyncUnlock releases the server for writes. See FsyncLock for details.
  1472  func (s *Session) FsyncUnlock() error {
  1473  	return s.DB("admin").C("$cmd.sys.unlock").Find(nil).One(nil) // WTF?
  1474  }
  1475  
  1476  // Find prepares a query using the provided document.  The document may be a
  1477  // map or a struct value capable of being marshalled with bson.  The map
  1478  // may be a generic one using interface{} for its key and/or values, such as
  1479  // bson.M, or it may be a properly typed map.  Providing nil as the document
  1480  // is equivalent to providing an empty document such as bson.M{}.
  1481  //
  1482  // Further details of the query may be tweaked using the resulting Query value,
  1483  // and then executed to retrieve results using methods such as One, For,
  1484  // Iter, or Tail.
  1485  //
  1486  // In case the resulting document includes a field named $err or errmsg, which
  1487  // are standard ways for MongoDB to return query errors, the returned err will
  1488  // be set to a *QueryError value including the Err message and the Code.  In
  1489  // those cases, the result argument is still unmarshalled into with the
  1490  // received document so that any other custom values may be obtained if
  1491  // desired.
  1492  //
  1493  // Relevant documentation:
  1494  //
  1495  //     http://www.mongodb.org/display/DOCS/Querying
  1496  //     http://www.mongodb.org/display/DOCS/Advanced+Queries
  1497  //
  1498  func (c *Collection) Find(query interface{}) *Query {
  1499  	session := c.Database.Session
  1500  	session.m.RLock()
  1501  	q := &Query{session: session, query: session.queryConfig}
  1502  	session.m.RUnlock()
  1503  	q.op.query = query
  1504  	q.op.collection = c.FullName
  1505  	return q
  1506  }
  1507  
  1508  // FindId is a convenience helper equivalent to:
  1509  //
  1510  //     query := collection.Find(bson.M{"_id": id})
  1511  //
  1512  // See the Find method for more details.
  1513  func (c *Collection) FindId(id interface{}) *Query {
  1514  	return c.Find(bson.D{{"_id", id}})
  1515  }
  1516  
  1517  type Pipe struct {
  1518  	session    *Session
  1519  	collection *Collection
  1520  	pipeline   interface{}
  1521  }
  1522  
  1523  // Pipe prepares a pipeline to aggregate. The pipeline document
  1524  // must be a slice built in terms of the aggregation framework language.
  1525  //
  1526  // For example:
  1527  //
  1528  //     pipe := collection.Pipe([]bson.M{{"$match": bson.M{"name": "Otavio"}}})
  1529  //     iter := pipe.Iter()
  1530  //
  1531  // Relevant documentation:
  1532  //
  1533  //     http://docs.mongodb.org/manual/reference/aggregation
  1534  //     http://docs.mongodb.org/manual/applications/aggregation
  1535  //     http://docs.mongodb.org/manual/tutorial/aggregation-examples
  1536  //
  1537  func (c *Collection) Pipe(pipeline interface{}) *Pipe {
  1538  	session := c.Database.Session
  1539  	return &Pipe{
  1540  		session:    session,
  1541  		collection: c,
  1542  		pipeline:   pipeline,
  1543  	}
  1544  }
  1545  
  1546  // Iter executes the pipeline and returns an iterator capable of going
  1547  // over all the generated results.
  1548  func (p *Pipe) Iter() *Iter {
  1549  	iter := &Iter{
  1550  		session: p.session,
  1551  		timeout: -1,
  1552  	}
  1553  	iter.gotReply.L = &iter.m
  1554  	var result struct{ Result []bson.Raw }
  1555  	c := p.collection
  1556  	iter.err = c.Database.Run(bson.D{{"aggregate", c.Name}, {"pipeline", p.pipeline}}, &result)
  1557  	if iter.err != nil {
  1558  		return iter
  1559  	}
  1560  	for i := range result.Result {
  1561  		iter.docData.Push(result.Result[i].Data)
  1562  	}
  1563  	return iter
  1564  }
  1565  
  1566  // All works like Iter.All.
  1567  func (p *Pipe) All(result interface{}) error {
  1568  	return p.Iter().All(result)
  1569  }
  1570  
  1571  // One executes the pipeline and unmarshals the first item from the
  1572  // result set into the result parameter.
  1573  // It returns ErrNotFound if no items are generated by the pipeline.
  1574  func (p *Pipe) One(result interface{}) error {
  1575  	iter := p.Iter()
  1576  	if iter.Next(result) {
  1577  		return nil
  1578  	}
  1579  	if err := iter.Err(); err != nil {
  1580  		return err
  1581  	}
  1582  	return ErrNotFound
  1583  }
  1584  
  1585  type LastError struct {
  1586  	Err             string
  1587  	Code, N, Waited int
  1588  	FSyncFiles      int `bson:"fsyncFiles"`
  1589  	WTimeout        bool
  1590  	UpdatedExisting bool        `bson:"updatedExisting"`
  1591  	UpsertedId      interface{} `bson:"upserted"`
  1592  }
  1593  
  1594  func (err *LastError) Error() string {
  1595  	return err.Err
  1596  }
  1597  
  1598  type queryError struct {
  1599  	Err           string "$err"
  1600  	ErrMsg        string
  1601  	Assertion     string
  1602  	Code          int
  1603  	AssertionCode int        "assertionCode"
  1604  	LastError     *LastError "lastErrorObject"
  1605  }
  1606  
  1607  type QueryError struct {
  1608  	Code      int
  1609  	Message   string
  1610  	Assertion bool
  1611  }
  1612  
  1613  func (err *QueryError) Error() string {
  1614  	return err.Message
  1615  }
  1616  
  1617  // IsDup returns whether err informs of a duplicate key error because
  1618  // a primary key index or a secondary unique index already has an entry
  1619  // with the given value.
  1620  func IsDup(err error) bool {
  1621  	// Besides being handy, helps with https://jira.mongodb.org/browse/SERVER-7164
  1622  	// What follows makes me sad. Hopefully conventions will be more clear over time.
  1623  	switch e := err.(type) {
  1624  	case *LastError:
  1625  		return e.Code == 11000 || e.Code == 11001 || e.Code == 12582
  1626  	case *QueryError:
  1627  		return e.Code == 11000 || e.Code == 11001 || e.Code == 12582
  1628  	}
  1629  	return false
  1630  }
  1631  
  1632  // Insert inserts one or more documents in the respective collection.  In
  1633  // case the session is in safe mode (see the SetSafe method) and an error
  1634  // happens while inserting the provided documents, the returned error will
  1635  // be of type *LastError.
  1636  func (c *Collection) Insert(docs ...interface{}) error {
  1637  	_, err := c.writeQuery(&insertOp{c.FullName, docs})
  1638  	return err
  1639  }
  1640  
  1641  // Update finds a single document matching the provided selector document
  1642  // and modifies it according to the change document.
  1643  // If the session is in safe mode (see SetSafe) a ErrNotFound error is
  1644  // returned if a document isn't found, or a value of type *LastError
  1645  // when some other error is detected.
  1646  //
  1647  // Relevant documentation:
  1648  //
  1649  //     http://www.mongodb.org/display/DOCS/Updating
  1650  //     http://www.mongodb.org/display/DOCS/Atomic+Operations
  1651  //
  1652  func (c *Collection) Update(selector interface{}, change interface{}) error {
  1653  	lerr, err := c.writeQuery(&updateOp{c.FullName, selector, change, 0})
  1654  	if err == nil && lerr != nil && !lerr.UpdatedExisting {
  1655  		return ErrNotFound
  1656  	}
  1657  	return err
  1658  }
  1659  
  1660  // UpdateId is a convenience helper equivalent to:
  1661  //
  1662  //     err := collection.Update(bson.M{"_id": id}, change)
  1663  //
  1664  // See the Update method for more details.
  1665  func (c *Collection) UpdateId(id interface{}, change interface{}) error {
  1666  	return c.Update(bson.D{{"_id", id}}, change)
  1667  }
  1668  
  1669  // ChangeInfo holds details about the outcome of a change operation.
  1670  type ChangeInfo struct {
  1671  	Updated    int         // Number of existing documents updated
  1672  	Removed    int         // Number of documents removed
  1673  	UpsertedId interface{} // Upserted _id field, when not explicitly provided
  1674  }
  1675  
  1676  // UpdateAll finds all documents matching the provided selector document
  1677  // and modifies them according to the change document.
  1678  // If the session is in safe mode (see SetSafe) details of the executed
  1679  // operation are returned in info or an error of type *LastError when
  1680  // some problem is detected. It is not an error for the update to not be
  1681  // applied on any documents because the selector doesn't match.
  1682  //
  1683  // Relevant documentation:
  1684  //
  1685  //     http://www.mongodb.org/display/DOCS/Updating
  1686  //     http://www.mongodb.org/display/DOCS/Atomic+Operations
  1687  //
  1688  func (c *Collection) UpdateAll(selector interface{}, change interface{}) (info *ChangeInfo, err error) {
  1689  	lerr, err := c.writeQuery(&updateOp{c.FullName, selector, change, 2})
  1690  	if err == nil && lerr != nil {
  1691  		info = &ChangeInfo{Updated: lerr.N}
  1692  	}
  1693  	return info, err
  1694  }
  1695  
  1696  // Upsert finds a single document matching the provided selector document
  1697  // and modifies it according to the change document.  If no document matching
  1698  // the selector is found, the change document is applied to the selector
  1699  // document and the result is inserted in the collection.
  1700  // If the session is in safe mode (see SetSafe) details of the executed
  1701  // operation are returned in info, or an error of type *LastError when
  1702  // some problem is detected.
  1703  //
  1704  // Relevant documentation:
  1705  //
  1706  //     http://www.mongodb.org/display/DOCS/Updating
  1707  //     http://www.mongodb.org/display/DOCS/Atomic+Operations
  1708  //
  1709  func (c *Collection) Upsert(selector interface{}, change interface{}) (info *ChangeInfo, err error) {
  1710  	data, err := bson.Marshal(change)
  1711  	if err != nil {
  1712  		return nil, err
  1713  	}
  1714  	change = bson.Raw{0x03, data}
  1715  	lerr, err := c.writeQuery(&updateOp{c.FullName, selector, change, 1})
  1716  	if err == nil && lerr != nil {
  1717  		info = &ChangeInfo{}
  1718  		if lerr.UpdatedExisting {
  1719  			info.Updated = lerr.N
  1720  		} else {
  1721  			info.UpsertedId = lerr.UpsertedId
  1722  		}
  1723  	}
  1724  	return info, err
  1725  }
  1726  
  1727  // UpsertId is a convenience helper equivalent to:
  1728  //
  1729  //     info, err := collection.Upsert(bson.M{"_id": id}, change)
  1730  //
  1731  // See the Upsert method for more details.
  1732  func (c *Collection) UpsertId(id interface{}, change interface{}) (info *ChangeInfo, err error) {
  1733  	return c.Upsert(bson.D{{"_id", id}}, change)
  1734  }
  1735  
  1736  // Remove finds a single document matching the provided selector document
  1737  // and removes it from the database.
  1738  // If the session is in safe mode (see SetSafe) a ErrNotFound error is
  1739  // returned if a document isn't found, or a value of type *LastError
  1740  // when some other error is detected.
  1741  //
  1742  // Relevant documentation:
  1743  //
  1744  //     http://www.mongodb.org/display/DOCS/Removing
  1745  //
  1746  func (c *Collection) Remove(selector interface{}) error {
  1747  	lerr, err := c.writeQuery(&deleteOp{c.FullName, selector, 1})
  1748  	if err == nil && lerr != nil && lerr.N == 0 {
  1749  		return ErrNotFound
  1750  	}
  1751  	return err
  1752  }
  1753  
  1754  // RemoveId is a convenience helper equivalent to:
  1755  //
  1756  //     err := collection.Remove(bson.M{"_id": id})
  1757  //
  1758  // See the Remove method for more details.
  1759  func (c *Collection) RemoveId(id interface{}) error {
  1760  	return c.Remove(bson.D{{"_id", id}})
  1761  }
  1762  
  1763  // RemoveAll finds all documents matching the provided selector document
  1764  // and removes them from the database.  In case the session is in safe mode
  1765  // (see the SetSafe method) and an error happens when attempting the change,
  1766  // the returned error will be of type *LastError.
  1767  //
  1768  // Relevant documentation:
  1769  //
  1770  //     http://www.mongodb.org/display/DOCS/Removing
  1771  //
  1772  func (c *Collection) RemoveAll(selector interface{}) (info *ChangeInfo, err error) {
  1773  	lerr, err := c.writeQuery(&deleteOp{c.FullName, selector, 0})
  1774  	if err == nil && lerr != nil {
  1775  		info = &ChangeInfo{Removed: lerr.N}
  1776  	}
  1777  	return info, err
  1778  }
  1779  
  1780  // DropDatabase removes the entire database including all of its collections.
  1781  func (db *Database) DropDatabase() error {
  1782  	return db.Run(bson.D{{"dropDatabase", 1}}, nil)
  1783  }
  1784  
  1785  // DropCollection removes the entire collection including all of its documents.
  1786  func (c *Collection) DropCollection() error {
  1787  	return c.Database.Run(bson.D{{"drop", c.Name}}, nil)
  1788  }
  1789  
  1790  // The CollectionInfo type holds metadata about a collection.
  1791  //
  1792  // Relevant documentation:
  1793  //
  1794  //     http://www.mongodb.org/display/DOCS/createCollection+Command
  1795  //     http://www.mongodb.org/display/DOCS/Capped+Collections
  1796  //
  1797  type CollectionInfo struct {
  1798  	// DisableIdIndex prevents the automatic creation of the index
  1799  	// on the _id field for the collection.
  1800  	DisableIdIndex bool
  1801  
  1802  	// ForceIdIndex enforces the automatic creation of the index
  1803  	// on the _id field for the collection. Capped collections,
  1804  	// for example, do not have such an index by default.
  1805  	ForceIdIndex bool
  1806  
  1807  	// If Capped is true new documents will replace old ones when
  1808  	// the collection is full. MaxBytes must necessarily be set
  1809  	// to define the size when the collection wraps around.
  1810  	// MaxDocs optionally defines the number of documents when it
  1811  	// wraps, but MaxBytes still needs to be set.
  1812  	Capped   bool
  1813  	MaxBytes int
  1814  	MaxDocs  int
  1815  }
  1816  
  1817  // Create explicitly creates the c collection with details of info.
  1818  // MongoDB creates collections automatically on use, so this method
  1819  // is only necessary when creating collection with non-default
  1820  // characteristics, such as capped collections.
  1821  //
  1822  // Relevant documentation:
  1823  //
  1824  //     http://www.mongodb.org/display/DOCS/createCollection+Command
  1825  //     http://www.mongodb.org/display/DOCS/Capped+Collections
  1826  //
  1827  func (c *Collection) Create(info *CollectionInfo) error {
  1828  	cmd := make(bson.D, 0, 4)
  1829  	cmd = append(cmd, bson.DocElem{"create", c.Name})
  1830  	if info.Capped {
  1831  		if info.MaxBytes < 1 {
  1832  			return fmt.Errorf("Collection.Create: with Capped, MaxBytes must also be set")
  1833  		}
  1834  		cmd = append(cmd, bson.DocElem{"capped", true})
  1835  		cmd = append(cmd, bson.DocElem{"size", info.MaxBytes})
  1836  		if info.MaxDocs > 0 {
  1837  			cmd = append(cmd, bson.DocElem{"max", info.MaxDocs})
  1838  		}
  1839  	}
  1840  	if info.DisableIdIndex {
  1841  		cmd = append(cmd, bson.DocElem{"autoIndexId", false})
  1842  	}
  1843  	if info.ForceIdIndex {
  1844  		cmd = append(cmd, bson.DocElem{"autoIndexId", true})
  1845  	}
  1846  	return c.Database.Run(cmd, nil)
  1847  }
  1848  
  1849  // Batch sets the batch size used when fetching documents from the database.
  1850  // It's possible to change this setting on a per-session basis as well, using
  1851  // the Batch method of Session.
  1852  //
  1853  // The default batch size is defined by the database itself.  As of this
  1854  // writing, MongoDB will use an initial size of min(100 docs, 4MB) on the
  1855  // first batch, and 4MB on remaining ones.
  1856  func (q *Query) Batch(n int) *Query {
  1857  	if n == 1 {
  1858  		// Server interprets 1 as -1 and closes the cursor (!?)
  1859  		n = 2
  1860  	}
  1861  	q.m.Lock()
  1862  	q.op.limit = int32(n)
  1863  	q.m.Unlock()
  1864  	return q
  1865  }
  1866  
  1867  // Prefetch sets the point at which the next batch of results will be requested.
  1868  // When there are p*batch_size remaining documents cached in an Iter, the next
  1869  // batch will be requested in background. For instance, when using this:
  1870  //
  1871  //     query.Batch(200).Prefetch(0.25)
  1872  //
  1873  // and there are only 50 documents cached in the Iter to be processed, the
  1874  // next batch of 200 will be requested. It's possible to change this setting on
  1875  // a per-session basis as well, using the SetPrefetch method of Session.
  1876  //
  1877  // The default prefetch value is 0.25.
  1878  func (q *Query) Prefetch(p float64) *Query {
  1879  	q.m.Lock()
  1880  	q.prefetch = p
  1881  	q.m.Unlock()
  1882  	return q
  1883  }
  1884  
  1885  // Skip skips over the n initial documents from the query results.  Note that
  1886  // this only makes sense with capped collections where documents are naturally
  1887  // ordered by insertion time, or with sorted results.
  1888  func (q *Query) Skip(n int) *Query {
  1889  	q.m.Lock()
  1890  	q.op.skip = int32(n)
  1891  	q.m.Unlock()
  1892  	return q
  1893  }
  1894  
  1895  // Limit restricts the maximum number of documents retrieved to n, and also
  1896  // changes the batch size to the same value.  Once n documents have been
  1897  // returned by Next, the following call will return ErrNotFound.
  1898  func (q *Query) Limit(n int) *Query {
  1899  	q.m.Lock()
  1900  	switch {
  1901  	case n == 1:
  1902  		q.limit = 1
  1903  		q.op.limit = -1
  1904  	case n == math.MinInt32: // -MinInt32 == -MinInt32
  1905  		q.limit = math.MaxInt32
  1906  		q.op.limit = math.MinInt32 + 1
  1907  	case n < 0:
  1908  		q.limit = int32(-n)
  1909  		q.op.limit = int32(n)
  1910  	default:
  1911  		q.limit = int32(n)
  1912  		q.op.limit = int32(n)
  1913  	}
  1914  	q.m.Unlock()
  1915  	return q
  1916  }
  1917  
  1918  // Select enables selecting which fields should be retrieved for the results
  1919  // found. For example, the following query would only retrieve the name field:
  1920  //
  1921  //     err := collection.Find(nil).Select(bson.M{"name": 1}).One(&result)
  1922  //
  1923  // Relevant documentation:
  1924  //
  1925  //     http://www.mongodb.org/display/DOCS/Retrieving+a+Subset+of+Fields
  1926  //
  1927  func (q *Query) Select(selector interface{}) *Query {
  1928  	q.m.Lock()
  1929  	q.op.selector = selector
  1930  	q.m.Unlock()
  1931  	return q
  1932  }
  1933  
  1934  // Sort asks the database to order returned documents according to the
  1935  // provided field names. A field name may be prefixed by - (minus) for
  1936  // it to be sorted in reverse order.
  1937  //
  1938  // For example:
  1939  //
  1940  //     query1 := collection.Find(nil).Sort("firstname", "lastname")
  1941  //     query2 := collection.Find(nil).Sort("-age")
  1942  //     query3 := collection.Find(nil).Sort("$natural")
  1943  //
  1944  // Relevant documentation:
  1945  //
  1946  //     http://www.mongodb.org/display/DOCS/Sorting+and+Natural+Order
  1947  //
  1948  func (q *Query) Sort(fields ...string) *Query {
  1949  	q.m.Lock()
  1950  	var order bson.D
  1951  	for _, field := range fields {
  1952  		n := 1
  1953  		if field != "" {
  1954  			switch field[0] {
  1955  			case '+':
  1956  				field = field[1:]
  1957  			case '-':
  1958  				n = -1
  1959  				field = field[1:]
  1960  			}
  1961  		}
  1962  		if field == "" {
  1963  			panic("Sort: empty field name")
  1964  		}
  1965  		order = append(order, bson.DocElem{field, n})
  1966  	}
  1967  	q.op.options.OrderBy = order
  1968  	q.op.hasOptions = true
  1969  	q.m.Unlock()
  1970  	return q
  1971  }
  1972  
  1973  // Explain returns a number of details about how the MongoDB server would
  1974  // execute the requested query, such as the number of objects examined,
  1975  // the number of time the read lock was yielded to allow writes to go in,
  1976  // and so on.
  1977  //
  1978  // For example:
  1979  //
  1980  //     m := bson.M{}
  1981  //     err := collection.Find(bson.M{"filename": name}).Explain(m)
  1982  //     if err == nil {
  1983  //         fmt.Printf("Explain: %#v\n", m)
  1984  //     }
  1985  //
  1986  // Relevant documentation:
  1987  //
  1988  //     http://www.mongodb.org/display/DOCS/Optimization
  1989  //     http://www.mongodb.org/display/DOCS/Query+Optimizer
  1990  //
  1991  func (q *Query) Explain(result interface{}) error {
  1992  	q.m.Lock()
  1993  	clone := &Query{session: q.session, query: q.query}
  1994  	q.m.Unlock()
  1995  	clone.op.options.Explain = true
  1996  	clone.op.hasOptions = true
  1997  	if clone.op.limit > 0 {
  1998  		clone.op.limit = -q.op.limit
  1999  	}
  2000  	iter := clone.Iter()
  2001  	if iter.Next(result) {
  2002  		return nil
  2003  	}
  2004  	return iter.Close()
  2005  }
  2006  
  2007  // Hint will include an explicit "hint" in the query to force the server
  2008  // to use a specified index, potentially improving performance in some
  2009  // situations.  The provided parameters are the fields that compose the
  2010  // key of the index to be used.  For details on how the indexKey may be
  2011  // built, see the EnsureIndex method.
  2012  //
  2013  // For example:
  2014  //
  2015  //     query := collection.Find(bson.M{"firstname": "Joe", "lastname": "Winter"})
  2016  //     query.Hint("lastname", "firstname")
  2017  //
  2018  // Relevant documentation:
  2019  //
  2020  //     http://www.mongodb.org/display/DOCS/Optimization
  2021  //     http://www.mongodb.org/display/DOCS/Query+Optimizer
  2022  //
  2023  func (q *Query) Hint(indexKey ...string) *Query {
  2024  	q.m.Lock()
  2025  	_, realKey, err := parseIndexKey(indexKey)
  2026  	q.op.options.Hint = realKey
  2027  	q.op.hasOptions = true
  2028  	q.m.Unlock()
  2029  	if err != nil {
  2030  		panic(err)
  2031  	}
  2032  	return q
  2033  }
  2034  
  2035  // Snapshot will force the performed query to make use of an available
  2036  // index on the _id field to prevent the same document from being returned
  2037  // more than once in a single iteration. This might happen without this
  2038  // setting in situations when the document changes in size and thus has to
  2039  // be moved while the iteration is running.
  2040  //
  2041  // Because snapshot mode traverses the _id index, it may not be used with
  2042  // sorting or explicit hints. It also cannot use any other index for the
  2043  // query.
  2044  //
  2045  // Even with snapshot mode, items inserted or deleted during the query may
  2046  // or may not be returned; that is, this mode is not a true point-in-time
  2047  // snapshot.
  2048  //
  2049  // The same effect of Snapshot may be obtained by using any unique index on
  2050  // field(s) that will not be modified (best to use Hint explicitly too).
  2051  // A non-unique index (such as creation time) may be made unique by
  2052  // appending _id to the index when creating it.
  2053  //
  2054  // Relevant documentation:
  2055  //
  2056  //     http://www.mongodb.org/display/DOCS/How+to+do+Snapshotted+Queries+in+the+Mongo+Database
  2057  //
  2058  func (q *Query) Snapshot() *Query {
  2059  	q.m.Lock()
  2060  	q.op.options.Snapshot = true
  2061  	q.op.hasOptions = true
  2062  	q.m.Unlock()
  2063  	return q
  2064  }
  2065  
  2066  // LogReplay enables an option that optimizes queries that are typically
  2067  // made against the MongoDB oplog for replaying it. This is an internal
  2068  // implementation aspect and most likely uninteresting for other uses.
  2069  // It has seen at least one use case, though, so it's exposed via the API.
  2070  func (q *Query) LogReplay() *Query {
  2071  	q.m.Lock()
  2072  	q.op.flags |= flagLogReplay
  2073  	q.m.Unlock()
  2074  	return q
  2075  }
  2076  
  2077  func checkQueryError(fullname string, d []byte) error {
  2078  	l := len(d)
  2079  	if l < 16 {
  2080  		return nil
  2081  	}
  2082  	if d[5] == '$' && d[6] == 'e' && d[7] == 'r' && d[8] == 'r' && d[9] == '\x00' && d[4] == '\x02' {
  2083  		goto Error
  2084  	}
  2085  	if len(fullname) < 5 || fullname[len(fullname)-5:] != ".$cmd" {
  2086  		return nil
  2087  	}
  2088  	for i := 0; i+8 < l; i++ {
  2089  		if d[i] == '\x02' && d[i+1] == 'e' && d[i+2] == 'r' && d[i+3] == 'r' && d[i+4] == 'm' && d[i+5] == 's' && d[i+6] == 'g' && d[i+7] == '\x00' {
  2090  			goto Error
  2091  		}
  2092  	}
  2093  	return nil
  2094  
  2095  Error:
  2096  	result := &queryError{}
  2097  	bson.Unmarshal(d, result)
  2098  	logf("queryError: %#v\n", result)
  2099  	if result.LastError != nil {
  2100  		return result.LastError
  2101  	}
  2102  	if result.Err == "" && result.ErrMsg == "" {
  2103  		return nil
  2104  	}
  2105  	if result.AssertionCode != 0 && result.Assertion != "" {
  2106  		return &QueryError{Code: result.AssertionCode, Message: result.Assertion, Assertion: true}
  2107  	}
  2108  	if result.Err != "" {
  2109  		return &QueryError{Code: result.Code, Message: result.Err}
  2110  	}
  2111  	return &QueryError{Code: result.Code, Message: result.ErrMsg}
  2112  }
  2113  
  2114  // One executes the query and unmarshals the first obtained document into the
  2115  // result argument.  The result must be a struct or map value capable of being
  2116  // unmarshalled into by gobson.  This function blocks until either a result
  2117  // is available or an error happens.  For example:
  2118  //
  2119  //     err := collection.Find(bson.M{"a", 1}).One(&result)
  2120  //
  2121  // In case the resulting document includes a field named $err or errmsg, which
  2122  // are standard ways for MongoDB to return query errors, the returned err will
  2123  // be set to a *QueryError value including the Err message and the Code.  In
  2124  // those cases, the result argument is still unmarshalled into with the
  2125  // received document so that any other custom values may be obtained if
  2126  // desired.
  2127  //
  2128  func (q *Query) One(result interface{}) (err error) {
  2129  	q.m.Lock()
  2130  	session := q.session
  2131  	op := q.op // Copy.
  2132  	q.m.Unlock()
  2133  
  2134  	socket, err := session.acquireSocket(true)
  2135  	if err != nil {
  2136  		return err
  2137  	}
  2138  	defer socket.Release()
  2139  
  2140  	op.flags |= session.slaveOkFlag()
  2141  	op.limit = -1
  2142  
  2143  	data, err := socket.SimpleQuery(&op)
  2144  	if err != nil {
  2145  		return err
  2146  	}
  2147  	if data == nil {
  2148  		return ErrNotFound
  2149  	}
  2150  	if result != nil {
  2151  		err = bson.Unmarshal(data, result)
  2152  		if err == nil {
  2153  			debugf("Query %p document unmarshaled: %#v", q, result)
  2154  		} else {
  2155  			debugf("Query %p document unmarshaling failed: %#v", q, err)
  2156  			return err
  2157  		}
  2158  	}
  2159  	return checkQueryError(op.collection, data)
  2160  }
  2161  
  2162  // The DBRef type implements support for the database reference MongoDB
  2163  // convention as supported by multiple drivers.  This convention enables
  2164  // cross-referencing documents between collections and databases using
  2165  // a structure which includes a collection name, a document id, and
  2166  // optionally a database name.
  2167  //
  2168  // See the FindRef methods on Session and on Database.
  2169  //
  2170  // Relevant documentation:
  2171  //
  2172  //     http://www.mongodb.org/display/DOCS/Database+References
  2173  //
  2174  type DBRef struct {
  2175  	Collection string      `bson:"$ref"`
  2176  	Id         interface{} `bson:"$id"`
  2177  	Database   string      `bson:"$db,omitempty"`
  2178  }
  2179  
  2180  // NOTE: Order of fields for DBRef above does matter, per documentation.
  2181  
  2182  // FindRef returns a query that looks for the document in the provided
  2183  // reference. If the reference includes the DB field, the document will
  2184  // be retrieved from the respective database.
  2185  //
  2186  // See also the DBRef type and the FindRef method on Session.
  2187  //
  2188  // Relevant documentation:
  2189  //
  2190  //     http://www.mongodb.org/display/DOCS/Database+References
  2191  //
  2192  func (db *Database) FindRef(ref *DBRef) *Query {
  2193  	var c *Collection
  2194  	if ref.Database == "" {
  2195  		c = db.C(ref.Collection)
  2196  	} else {
  2197  		c = db.Session.DB(ref.Database).C(ref.Collection)
  2198  	}
  2199  	return c.FindId(ref.Id)
  2200  }
  2201  
  2202  // FindRef returns a query that looks for the document in the provided
  2203  // reference. For a DBRef to be resolved correctly at the session level
  2204  // it must necessarily have the optional DB field defined.
  2205  //
  2206  // See also the DBRef type and the FindRef method on Database.
  2207  //
  2208  // Relevant documentation:
  2209  //
  2210  //     http://www.mongodb.org/display/DOCS/Database+References
  2211  //
  2212  func (s *Session) FindRef(ref *DBRef) *Query {
  2213  	if ref.Database == "" {
  2214  		panic(errors.New(fmt.Sprintf("Can't resolve database for %#v", ref)))
  2215  	}
  2216  	c := s.DB(ref.Database).C(ref.Collection)
  2217  	return c.FindId(ref.Id)
  2218  }
  2219  
  2220  // CollectionNames returns the collection names present in database.
  2221  func (db *Database) CollectionNames() (names []string, err error) {
  2222  	c := len(db.Name) + 1
  2223  	iter := db.C("system.namespaces").Find(nil).Iter()
  2224  	var result *struct{ Name string }
  2225  	for iter.Next(&result) {
  2226  		if strings.Index(result.Name, "$") < 0 || strings.Index(result.Name, ".oplog.$") >= 0 {
  2227  			names = append(names, result.Name[c:])
  2228  		}
  2229  	}
  2230  	if err := iter.Close(); err != nil {
  2231  		return nil, err
  2232  	}
  2233  	sort.Strings(names)
  2234  	return names, nil
  2235  }
  2236  
  2237  type dbNames struct {
  2238  	Databases []struct {
  2239  		Name  string
  2240  		Empty bool
  2241  	}
  2242  }
  2243  
  2244  // DatabaseNames returns the names of non-empty databases present in the cluster.
  2245  func (s *Session) DatabaseNames() (names []string, err error) {
  2246  	var result dbNames
  2247  	err = s.Run("listDatabases", &result)
  2248  	if err != nil {
  2249  		return nil, err
  2250  	}
  2251  	for _, db := range result.Databases {
  2252  		if !db.Empty {
  2253  			names = append(names, db.Name)
  2254  		}
  2255  	}
  2256  	sort.Strings(names)
  2257  	return names, nil
  2258  }
  2259  
  2260  // Iter executes the query and returns an iterator capable of going over all
  2261  // the results. Results will be returned in batches of configurable
  2262  // size (see the Batch method) and more documents will be requested when a
  2263  // configurable number of documents is iterated over (see the Prefetch method).
  2264  func (q *Query) Iter() *Iter {
  2265  	q.m.Lock()
  2266  	session := q.session
  2267  	op := q.op
  2268  	prefetch := q.prefetch
  2269  	limit := q.limit
  2270  	q.m.Unlock()
  2271  
  2272  	iter := &Iter{
  2273  		session:  session,
  2274  		prefetch: prefetch,
  2275  		limit:    limit,
  2276  		timeout:  -1,
  2277  	}
  2278  	iter.gotReply.L = &iter.m
  2279  	iter.op.collection = op.collection
  2280  	iter.op.limit = op.limit
  2281  	iter.op.replyFunc = iter.replyFunc()
  2282  	iter.docsToReceive++
  2283  	op.replyFunc = iter.op.replyFunc
  2284  	op.flags |= session.slaveOkFlag()
  2285  
  2286  	socket, err := session.acquireSocket(true)
  2287  	if err != nil {
  2288  		iter.err = err
  2289  	} else {
  2290  		iter.err = socket.Query(&op)
  2291  		iter.server = socket.Server()
  2292  		socket.Release()
  2293  	}
  2294  	return iter
  2295  }
  2296  
  2297  // Tail returns a tailable iterator. Unlike a normal iterator, a
  2298  // tailable iterator may wait for new values to be inserted in the
  2299  // collection once the end of the current result set is reached,
  2300  // A tailable iterator may only be used with capped collections.
  2301  //
  2302  // The timeout parameter indicates how long Next will block waiting
  2303  // for a result before timing out.  If set to -1, Next will not
  2304  // timeout, and will continue waiting for a result for as long as
  2305  // the cursor is valid and the session is not closed. If set to 0,
  2306  // Next times out as soon as it reaches the end of the result set.
  2307  // Otherwise, Next will wait for at least the given number of
  2308  // seconds for a new document to be available before timing out.
  2309  //
  2310  // On timeouts, Next will unblock and return false, and the Timeout
  2311  // method will return true if called. In these cases, Next may still
  2312  // be called again on the same iterator to check if a new value is
  2313  // available at the current cursor position, and again it will block
  2314  // according to the specified timeoutSecs. If the cursor becomes
  2315  // invalid, though, both Next and Timeout will return false and
  2316  // the query must be restarted.
  2317  //
  2318  // The following example demonstrates timeout handling and query
  2319  // restarting:
  2320  //
  2321  //    iter := collection.Find(nil).Sort("$natural").Tail(5 * time.Second)
  2322  //    for {
  2323  //         for iter.Next(&result) {
  2324  //             fmt.Println(result.Id)
  2325  //             lastId = result.Id
  2326  //         }
  2327  //         if err := iter.Close(); err != nil {
  2328  //             return err
  2329  //         }
  2330  //         if iter.Timeout() {
  2331  //             continue
  2332  //         }
  2333  //         query := collection.Find(bson.M{"_id": bson.M{"$gt": lastId}})
  2334  //         iter = query.Sort("$natural").Tail(5 * time.Second)
  2335  //    }
  2336  //
  2337  // Relevant documentation:
  2338  //
  2339  //     http://www.mongodb.org/display/DOCS/Tailable+Cursors
  2340  //     http://www.mongodb.org/display/DOCS/Capped+Collections
  2341  //     http://www.mongodb.org/display/DOCS/Sorting+and+Natural+Order
  2342  //
  2343  func (q *Query) Tail(timeout time.Duration) *Iter {
  2344  	q.m.Lock()
  2345  	session := q.session
  2346  	op := q.op
  2347  	prefetch := q.prefetch
  2348  	q.m.Unlock()
  2349  
  2350  	iter := &Iter{session: session, prefetch: prefetch}
  2351  	iter.gotReply.L = &iter.m
  2352  	iter.timeout = timeout
  2353  	iter.op.collection = op.collection
  2354  	iter.op.limit = op.limit
  2355  	iter.op.replyFunc = iter.replyFunc()
  2356  	iter.docsToReceive++
  2357  	op.replyFunc = iter.op.replyFunc
  2358  	op.flags |= flagTailable | flagAwaitData | session.slaveOkFlag()
  2359  
  2360  	socket, err := session.acquireSocket(true)
  2361  	if err != nil {
  2362  		iter.err = err
  2363  	} else {
  2364  		iter.err = socket.Query(&op)
  2365  		iter.server = socket.Server()
  2366  		socket.Release()
  2367  	}
  2368  	return iter
  2369  }
  2370  
  2371  func (s *Session) slaveOkFlag() (flag queryOpFlags) {
  2372  	s.m.RLock()
  2373  	if s.slaveOk {
  2374  		flag = flagSlaveOk
  2375  	}
  2376  	s.m.RUnlock()
  2377  	return
  2378  }
  2379  
  2380  // Err returns nil if no errors happened during iteration, or the actual
  2381  // error otherwise.
  2382  //
  2383  // In case a resulting document included a field named $err or errmsg, which are
  2384  // standard ways for MongoDB to report an improper query, the returned value has
  2385  // a *QueryError type, and includes the Err message and the Code.
  2386  func (iter *Iter) Err() error {
  2387  	iter.m.Lock()
  2388  	err := iter.err
  2389  	iter.m.Unlock()
  2390  	if err == ErrNotFound {
  2391  		return nil
  2392  	}
  2393  	return err
  2394  }
  2395  
  2396  // Close kills the server cursor used by the iterator, if any, and returns
  2397  // nil if no errors happened during iteration, or the actual error otherwise.
  2398  //
  2399  // Server cursors are automatically closed at the end of an iteration, which
  2400  // means close will do nothing unless the iteration was interrupted before
  2401  // the server finished sending results to the driver. If Close is not called
  2402  // in such a situation, the cursor will remain available at the server until
  2403  // the default cursor timeout period is reached. No further problems arise.
  2404  //
  2405  // Close is idempotent. That means it can be called repeatedly and will
  2406  // return the same result every time.
  2407  //
  2408  // In case a resulting document included a field named $err or errmsg, which are
  2409  // standard ways for MongoDB to report an improper query, the returned value has
  2410  // a *QueryError type.
  2411  func (iter *Iter) Close() error {
  2412  	iter.m.Lock()
  2413  	iter.killCursor()
  2414  	err := iter.err
  2415  	iter.m.Unlock()
  2416  	if err == ErrNotFound {
  2417  		return nil
  2418  	}
  2419  	return err
  2420  }
  2421  
  2422  func (iter *Iter) killCursor() error {
  2423  	if iter.op.cursorId != 0 {
  2424  		socket, err := iter.acquireSocket()
  2425  		if err == nil {
  2426  			// TODO Batch kills.
  2427  			err = socket.Query(&killCursorsOp{[]int64{iter.op.cursorId}})
  2428  			socket.Release()
  2429  		}
  2430  		if err != nil && (iter.err == nil || iter.err == ErrNotFound) {
  2431  			iter.err = err
  2432  		}
  2433  		iter.op.cursorId = 0
  2434  		return err
  2435  	}
  2436  	return nil
  2437  }
  2438  
  2439  // Timeout returns true if Next returned false due to a timeout of
  2440  // a tailable cursor. In those cases, Next may be called again to continue
  2441  // the iteration at the previous cursor position.
  2442  func (iter *Iter) Timeout() bool {
  2443  	iter.m.Lock()
  2444  	result := iter.timedout
  2445  	iter.m.Unlock()
  2446  	return result
  2447  }
  2448  
  2449  // Next retrieves the next document from the result set, blocking if necessary.
  2450  // This method will also automatically retrieve another batch of documents from
  2451  // the server when the current one is exhausted, or before that in background
  2452  // if pre-fetching is enabled (see the Query.Prefetch and Session.SetPrefetch
  2453  // methods).
  2454  //
  2455  // Next returns true if a document was successfully unmarshalled onto result,
  2456  // and false at the end of the result set or if an error happened.
  2457  // When Next returns false, the Err method should be called to verify if
  2458  // there was an error during iteration.
  2459  //
  2460  // For example:
  2461  //
  2462  //    iter := collection.Find(nil).Iter()
  2463  //    for iter.Next(&result) {
  2464  //        fmt.Printf("Result: %v\n", result.Id)
  2465  //    }
  2466  //    if err := iter.Close(); err != nil {
  2467  //        return err
  2468  //    }
  2469  //
  2470  func (iter *Iter) Next(result interface{}) bool {
  2471  	iter.m.Lock()
  2472  	iter.timedout = false
  2473  	timeout := time.Time{}
  2474  	for iter.err == nil && iter.docData.Len() == 0 && (iter.docsToReceive > 0 || iter.op.cursorId != 0) {
  2475  		if iter.docsToReceive == 0 {
  2476  			if iter.timeout >= 0 {
  2477  				if timeout.IsZero() {
  2478  					timeout = time.Now().Add(iter.timeout)
  2479  				}
  2480  				if time.Now().After(timeout) {
  2481  					iter.timedout = true
  2482  					iter.m.Unlock()
  2483  					return false
  2484  				}
  2485  			}
  2486  			iter.getMore()
  2487  		}
  2488  		iter.gotReply.Wait()
  2489  	}
  2490  
  2491  	// Exhaust available data before reporting any errors.
  2492  	if docData, ok := iter.docData.Pop().([]byte); ok {
  2493  		if iter.limit > 0 {
  2494  			iter.limit--
  2495  			if iter.limit == 0 {
  2496  				if iter.docData.Len() > 0 {
  2497  					panic(fmt.Errorf("data remains after limit exhausted: %d", iter.docData.Len()))
  2498  				}
  2499  				iter.err = ErrNotFound
  2500  				if iter.killCursor() != nil {
  2501  					return false
  2502  				}
  2503  			}
  2504  		}
  2505  		if iter.op.cursorId != 0 && iter.err == nil {
  2506  			if iter.docsBeforeMore == 0 {
  2507  				iter.getMore()
  2508  			}
  2509  			iter.docsBeforeMore-- // Goes negative.
  2510  		}
  2511  		iter.m.Unlock()
  2512  		err := bson.Unmarshal(docData, result)
  2513  		if err != nil {
  2514  			debugf("Iter %p document unmarshaling failed: %#v", iter, err)
  2515  			iter.err = err
  2516  			return false
  2517  		}
  2518  		debugf("Iter %p document unmarshaled: %#v", iter, result)
  2519  		// XXX Only have to check first document for a query error?
  2520  		err = checkQueryError(iter.op.collection, docData)
  2521  		if err != nil {
  2522  			iter.m.Lock()
  2523  			if iter.err == nil {
  2524  				iter.err = err
  2525  			}
  2526  			iter.m.Unlock()
  2527  			return false
  2528  		}
  2529  		return true
  2530  	} else if iter.err != nil {
  2531  		debugf("Iter %p returning false: %s", iter, iter.err)
  2532  		iter.m.Unlock()
  2533  		return false
  2534  	} else if iter.op.cursorId == 0 {
  2535  		iter.err = ErrNotFound
  2536  		debugf("Iter %p exhausted with cursor=0", iter)
  2537  		iter.m.Unlock()
  2538  		return false
  2539  	}
  2540  
  2541  	panic("unreachable")
  2542  }
  2543  
  2544  // All retrieves all documents from the result set into the provided slice
  2545  // and closes the iterator.
  2546  //
  2547  // The result argument must necessarily be the address for a slice. The slice
  2548  // may be nil or previously allocated.
  2549  //
  2550  // WARNING: Obviously, All must not be used with result sets that may be
  2551  // potentially large, since it may consume all memory until the system
  2552  // crashes. Consider building the query with a Limit clause to ensure the
  2553  // result size is bounded.
  2554  //
  2555  // For instance:
  2556  //
  2557  //    var result []struct{ Value int }
  2558  //    iter := collection.Find(nil).Limit(100).Iter()
  2559  //    err := iter.All(&result)
  2560  //    if err != nil {
  2561  //        return err
  2562  //    }
  2563  //
  2564  func (iter *Iter) All(result interface{}) error {
  2565  	resultv := reflect.ValueOf(result)
  2566  	if resultv.Kind() != reflect.Ptr || resultv.Elem().Kind() != reflect.Slice {
  2567  		panic("result argument must be a slice address")
  2568  	}
  2569  	slicev := resultv.Elem()
  2570  	slicev = slicev.Slice(0, slicev.Cap())
  2571  	elemt := slicev.Type().Elem()
  2572  	i := 0
  2573  	for {
  2574  		if slicev.Len() == i {
  2575  			elemp := reflect.New(elemt)
  2576  			if !iter.Next(elemp.Interface()) {
  2577  				break
  2578  			}
  2579  			slicev = reflect.Append(slicev, elemp.Elem())
  2580  			slicev = slicev.Slice(0, slicev.Cap())
  2581  		} else {
  2582  			if !iter.Next(slicev.Index(i).Addr().Interface()) {
  2583  				break
  2584  			}
  2585  		}
  2586  		i++
  2587  	}
  2588  	resultv.Elem().Set(slicev.Slice(0, i))
  2589  	return iter.Close()
  2590  }
  2591  
  2592  // All works like Iter.All.
  2593  func (q *Query) All(result interface{}) error {
  2594  	return q.Iter().All(result)
  2595  }
  2596  
  2597  // The For method is obsolete and will be removed in a future release.
  2598  // See Iter as an elegant replacement.
  2599  func (q *Query) For(result interface{}, f func() error) error {
  2600  	return q.Iter().For(result, f)
  2601  }
  2602  
  2603  // The For method is obsolete and will be removed in a future release.
  2604  // See Iter as an elegant replacement.
  2605  func (iter *Iter) For(result interface{}, f func() error) (err error) {
  2606  	valid := false
  2607  	v := reflect.ValueOf(result)
  2608  	if v.Kind() == reflect.Ptr {
  2609  		v = v.Elem()
  2610  		switch v.Kind() {
  2611  		case reflect.Map, reflect.Ptr, reflect.Interface, reflect.Slice:
  2612  			valid = v.IsNil()
  2613  		}
  2614  	}
  2615  	if !valid {
  2616  		panic("For needs a pointer to nil reference value.  See the documentation.")
  2617  	}
  2618  	zero := reflect.Zero(v.Type())
  2619  	for {
  2620  		v.Set(zero)
  2621  		if !iter.Next(result) {
  2622  			break
  2623  		}
  2624  		err = f()
  2625  		if err != nil {
  2626  			return err
  2627  		}
  2628  	}
  2629  	return iter.Err()
  2630  }
  2631  
  2632  func (iter *Iter) acquireSocket() (*mongoSocket, error) {
  2633  	socket, err := iter.session.acquireSocket(true)
  2634  	if err != nil {
  2635  		return nil, err
  2636  	}
  2637  	if socket.Server() != iter.server {
  2638  		// Socket server changed during iteration. This may happen
  2639  		// with Eventual sessions, if a Refresh is done, or if a
  2640  		// monotonic session gets a write and shifts from secondary
  2641  		// to primary. Our cursor is in a specific server, though.
  2642  		iter.session.m.Lock()
  2643  		sockTimeout := iter.session.sockTimeout
  2644  		iter.session.m.Unlock()
  2645  		socket.Release()
  2646  		socket, _, err = iter.server.AcquireSocket(0, sockTimeout)
  2647  		if err != nil {
  2648  			return nil, err
  2649  		}
  2650  		err := iter.session.socketLogin(socket)
  2651  		if err != nil {
  2652  			socket.Release()
  2653  			return nil, err
  2654  		}
  2655  	}
  2656  	return socket, nil
  2657  }
  2658  
  2659  func (iter *Iter) getMore() {
  2660  	socket, err := iter.acquireSocket()
  2661  	if err != nil {
  2662  		iter.err = err
  2663  		return
  2664  	}
  2665  	defer socket.Release()
  2666  
  2667  	debugf("Iter %p requesting more documents", iter)
  2668  	if iter.limit > 0 {
  2669  		limit := iter.limit - int32(iter.docsToReceive) - int32(iter.docData.Len())
  2670  		if limit < iter.op.limit {
  2671  			iter.op.limit = limit
  2672  		}
  2673  	}
  2674  	if err := socket.Query(&iter.op); err != nil {
  2675  		iter.err = err
  2676  	}
  2677  	iter.docsToReceive++
  2678  }
  2679  
  2680  type countCmd struct {
  2681  	Count string
  2682  	Query interface{}
  2683  	Limit int32 ",omitempty"
  2684  	Skip  int32 ",omitempty"
  2685  }
  2686  
  2687  // Count returns the total number of documents in the result set.
  2688  func (q *Query) Count() (n int, err error) {
  2689  	q.m.Lock()
  2690  	session := q.session
  2691  	op := q.op
  2692  	limit := q.limit
  2693  	q.m.Unlock()
  2694  
  2695  	c := strings.Index(op.collection, ".")
  2696  	if c < 0 {
  2697  		return 0, errors.New("Bad collection name: " + op.collection)
  2698  	}
  2699  
  2700  	dbname := op.collection[:c]
  2701  	cname := op.collection[c+1:]
  2702  
  2703  	result := struct{ N int }{}
  2704  	err = session.DB(dbname).Run(countCmd{cname, op.query, limit, op.skip}, &result)
  2705  	return result.N, err
  2706  }
  2707  
  2708  // Count returns the total number of documents in the collection.
  2709  func (c *Collection) Count() (n int, err error) {
  2710  	return c.Find(nil).Count()
  2711  }
  2712  
  2713  type distinctCmd struct {
  2714  	Collection string "distinct"
  2715  	Key        string
  2716  	Query      interface{} ",omitempty"
  2717  }
  2718  
  2719  // Distinct returns a list of distinct values for the given key within
  2720  // the result set.  The list of distinct values will be unmarshalled
  2721  // in the "values" key of the provided result parameter.
  2722  //
  2723  // For example:
  2724  //
  2725  //     var result []int
  2726  //     err := collection.Find(bson.M{"gender": "F"}).Distinct("age", &result)
  2727  //
  2728  // Relevant documentation:
  2729  //
  2730  //     http://www.mongodb.org/display/DOCS/Aggregation
  2731  //
  2732  func (q *Query) Distinct(key string, result interface{}) error {
  2733  	q.m.Lock()
  2734  	session := q.session
  2735  	op := q.op // Copy.
  2736  	q.m.Unlock()
  2737  
  2738  	c := strings.Index(op.collection, ".")
  2739  	if c < 0 {
  2740  		return errors.New("Bad collection name: " + op.collection)
  2741  	}
  2742  
  2743  	dbname := op.collection[:c]
  2744  	cname := op.collection[c+1:]
  2745  
  2746  	var doc struct{ Values bson.Raw }
  2747  	err := session.DB(dbname).Run(distinctCmd{cname, key, op.query}, &doc)
  2748  	if err != nil {
  2749  		return err
  2750  	}
  2751  	return doc.Values.Unmarshal(result)
  2752  }
  2753  
  2754  type mapReduceCmd struct {
  2755  	Collection string "mapreduce"
  2756  	Map        string ",omitempty"
  2757  	Reduce     string ",omitempty"
  2758  	Finalize   string ",omitempty"
  2759  	Limit      int32  ",omitempty"
  2760  	Out        interface{}
  2761  	Query      interface{} ",omitempty"
  2762  	Sort       interface{} ",omitempty"
  2763  	Scope      interface{} ",omitempty"
  2764  	Verbose    bool        ",omitempty"
  2765  }
  2766  
  2767  type mapReduceResult struct {
  2768  	Results    bson.Raw
  2769  	Result     bson.Raw
  2770  	TimeMillis int64 "timeMillis"
  2771  	Counts     struct{ Input, Emit, Output int }
  2772  	Ok         bool
  2773  	Err        string
  2774  	Timing     *MapReduceTime
  2775  }
  2776  
  2777  type MapReduce struct {
  2778  	Map      string      // Map Javascript function code (required)
  2779  	Reduce   string      // Reduce Javascript function code (required)
  2780  	Finalize string      // Finalize Javascript function code (optional)
  2781  	Out      interface{} // Output collection name or document. If nil, results are inlined into the result parameter.
  2782  	Scope    interface{} // Optional global scope for Javascript functions
  2783  	Verbose  bool
  2784  }
  2785  
  2786  type MapReduceInfo struct {
  2787  	InputCount  int            // Number of documents mapped
  2788  	EmitCount   int            // Number of times reduce called emit
  2789  	OutputCount int            // Number of documents in resulting collection
  2790  	Database    string         // Output database, if results are not inlined
  2791  	Collection  string         // Output collection, if results are not inlined
  2792  	Time        int64          // Time to run the job, in nanoseconds
  2793  	VerboseTime *MapReduceTime // Only defined if Verbose was true
  2794  }
  2795  
  2796  type MapReduceTime struct {
  2797  	Total    int64 // Total time, in nanoseconds
  2798  	Map      int64 "mapTime"  // Time within map function, in nanoseconds
  2799  	EmitLoop int64 "emitLoop" // Time within the emit/map loop, in nanoseconds
  2800  }
  2801  
  2802  // MapReduce executes a map/reduce job for documents covered by the query.
  2803  // That kind of job is suitable for very flexible bulk aggregation of data
  2804  // performed at the server side via Javascript functions.
  2805  //
  2806  // Results from the job may be returned as a result of the query itself
  2807  // through the result parameter in case they'll certainly fit in memory
  2808  // and in a single document.  If there's the possibility that the amount
  2809  // of data might be too large, results must be stored back in an alternative
  2810  // collection or even a separate database, by setting the Out field of the
  2811  // provided MapReduce job.  In that case, provide nil as the result parameter.
  2812  //
  2813  // These are some of the ways to set Out:
  2814  //
  2815  //     nil
  2816  //         Inline results into the result parameter.
  2817  //
  2818  //     bson.M{"replace": "mycollection"}
  2819  //         The output will be inserted into a collection which replaces any
  2820  //         existing collection with the same name.
  2821  //
  2822  //     bson.M{"merge": "mycollection"}
  2823  //         This option will merge new data into the old output collection. In
  2824  //         other words, if the same key exists in both the result set and the
  2825  //         old collection, the new key will overwrite the old one.
  2826  //
  2827  //     bson.M{"reduce": "mycollection"}
  2828  //         If documents exist for a given key in the result set and in the old
  2829  //         collection, then a reduce operation (using the specified reduce
  2830  //         function) will be performed on the two values and the result will be
  2831  //         written to the output collection. If a finalize function was
  2832  //         provided, this will be run after the reduce as well.
  2833  //
  2834  //     bson.M{...., "db": "mydb"}
  2835  //         Any of the above options can have the "db" key included for doing
  2836  //         the respective action in a separate database.
  2837  //
  2838  // The following is a trivial example which will count the number of
  2839  // occurrences of a field named n on each document in a collection, and
  2840  // will return results inline:
  2841  //
  2842  //     job := &mgo.MapReduce{
  2843  //             Map:      "function() { emit(this.n, 1) }",
  2844  //             Reduce:   "function(key, values) { return Array.sum(values) }",
  2845  //     }
  2846  //     var result []struct { Id int "_id"; Value int }
  2847  //     _, err := collection.Find(nil).MapReduce(job, &result)
  2848  //     if err != nil {
  2849  //         return err
  2850  //     }
  2851  //     for _, item := range result {
  2852  //         fmt.Println(item.Value)
  2853  //     }
  2854  //
  2855  // This function is compatible with MongoDB 1.7.4+.
  2856  //
  2857  // Relevant documentation:
  2858  //
  2859  //     http://www.mongodb.org/display/DOCS/MapReduce
  2860  //
  2861  func (q *Query) MapReduce(job *MapReduce, result interface{}) (info *MapReduceInfo, err error) {
  2862  	q.m.Lock()
  2863  	session := q.session
  2864  	op := q.op // Copy.
  2865  	limit := q.limit
  2866  	q.m.Unlock()
  2867  
  2868  	c := strings.Index(op.collection, ".")
  2869  	if c < 0 {
  2870  		return nil, errors.New("Bad collection name: " + op.collection)
  2871  	}
  2872  
  2873  	dbname := op.collection[:c]
  2874  	cname := op.collection[c+1:]
  2875  
  2876  	cmd := mapReduceCmd{
  2877  		Collection: cname,
  2878  		Map:        job.Map,
  2879  		Reduce:     job.Reduce,
  2880  		Finalize:   job.Finalize,
  2881  		Out:        fixMROut(job.Out),
  2882  		Scope:      job.Scope,
  2883  		Verbose:    job.Verbose,
  2884  		Query:      op.query,
  2885  		Sort:       op.options.OrderBy,
  2886  		Limit:      limit,
  2887  	}
  2888  
  2889  	if cmd.Out == nil {
  2890  		cmd.Out = bson.M{"inline": 1}
  2891  	}
  2892  
  2893  	var doc mapReduceResult
  2894  	err = session.DB(dbname).Run(&cmd, &doc)
  2895  	if err != nil {
  2896  		return nil, err
  2897  	}
  2898  	if doc.Err != "" {
  2899  		return nil, errors.New(doc.Err)
  2900  	}
  2901  
  2902  	info = &MapReduceInfo{
  2903  		InputCount:  doc.Counts.Input,
  2904  		EmitCount:   doc.Counts.Emit,
  2905  		OutputCount: doc.Counts.Output,
  2906  		Time:        doc.TimeMillis * 1e6,
  2907  	}
  2908  
  2909  	if doc.Result.Kind == 0x02 {
  2910  		err = doc.Result.Unmarshal(&info.Collection)
  2911  		info.Database = dbname
  2912  	} else if doc.Result.Kind == 0x03 {
  2913  		var v struct{ Collection, Db string }
  2914  		err = doc.Result.Unmarshal(&v)
  2915  		info.Collection = v.Collection
  2916  		info.Database = v.Db
  2917  	}
  2918  
  2919  	if doc.Timing != nil {
  2920  		info.VerboseTime = doc.Timing
  2921  		info.VerboseTime.Total *= 1e6
  2922  		info.VerboseTime.Map *= 1e6
  2923  		info.VerboseTime.EmitLoop *= 1e6
  2924  	}
  2925  
  2926  	if err != nil {
  2927  		return nil, err
  2928  	}
  2929  	if result != nil {
  2930  		return info, doc.Results.Unmarshal(result)
  2931  	}
  2932  	return info, nil
  2933  }
  2934  
  2935  // The "out" option in the MapReduce command must be ordered. This was
  2936  // found after the implementation was accepting maps for a long time,
  2937  // so rather than breaking the API, we'll fix the order if necessary.
  2938  // Details about the order requirement may be seen in MongoDB's code:
  2939  //
  2940  //     http://goo.gl/L8jwJX
  2941  //
  2942  func fixMROut(out interface{}) interface{} {
  2943  	outv := reflect.ValueOf(out)
  2944  	if outv.Kind() != reflect.Map || outv.Type().Key() != reflect.TypeOf("") {
  2945  		return out
  2946  	}
  2947  	outs := make(bson.D, outv.Len())
  2948  
  2949  	outTypeIndex := -1
  2950  	for i, k := range outv.MapKeys() {
  2951  		ks := k.String()
  2952  		outs[i].Name = ks
  2953  		outs[i].Value = outv.MapIndex(k).Interface()
  2954  		switch ks {
  2955  		case "normal", "replace", "merge", "reduce", "inline":
  2956  			outTypeIndex = i
  2957  		}
  2958  	}
  2959  	if outTypeIndex > 0 {
  2960  		outs[0], outs[outTypeIndex] = outs[outTypeIndex], outs[0]
  2961  	}
  2962  	return outs
  2963  }
  2964  
  2965  type Change struct {
  2966  	Update    interface{} // The change document
  2967  	Upsert    bool        // Whether to insert in case the document isn't found
  2968  	Remove    bool        // Whether to remove the document found rather than updating
  2969  	ReturnNew bool        // Should the modified document be returned rather than the old one
  2970  }
  2971  
  2972  type findModifyCmd struct {
  2973  	Collection                  string      "findAndModify"
  2974  	Query, Update, Sort, Fields interface{} ",omitempty"
  2975  	Upsert, Remove, New         bool        ",omitempty"
  2976  }
  2977  
  2978  type valueResult struct {
  2979  	Value     bson.Raw
  2980  	LastError LastError "lastErrorObject"
  2981  }
  2982  
  2983  // Apply allows updating, upserting or removing a document matching a query
  2984  // and atomically returning either the old version (the default) or the new
  2985  // version of the document (when ReturnNew is true). If no objects are
  2986  // found Apply returns ErrNotFound.
  2987  //
  2988  // The Sort and Select query methods affect the result of Apply.  In case
  2989  // multiple documents match the query, Sort enables selecting which document to
  2990  // act upon by ordering it first.  Select enables retrieving only a selection
  2991  // of fields of the new or old document.
  2992  //
  2993  // This simple example increments a counter and prints its new value:
  2994  //
  2995  //     change := mgo.Change{
  2996  //             Update: bson.M{"$inc": bson.M{"n": 1}},
  2997  //             ReturnNew: true,
  2998  //     }
  2999  //     info, err = col.Find(M{"_id": id}).Apply(change, &doc)
  3000  //     fmt.Println(doc.N)
  3001  //
  3002  // This method depends on MongoDB >= 2.0 to work properly.
  3003  //
  3004  // Relevant documentation:
  3005  //
  3006  //     http://www.mongodb.org/display/DOCS/findAndModify+Command
  3007  //     http://www.mongodb.org/display/DOCS/Updating
  3008  //     http://www.mongodb.org/display/DOCS/Atomic+Operations
  3009  //
  3010  func (q *Query) Apply(change Change, result interface{}) (info *ChangeInfo, err error) {
  3011  	q.m.Lock()
  3012  	session := q.session
  3013  	op := q.op // Copy.
  3014  	q.m.Unlock()
  3015  
  3016  	c := strings.Index(op.collection, ".")
  3017  	if c < 0 {
  3018  		return nil, errors.New("bad collection name: " + op.collection)
  3019  	}
  3020  
  3021  	dbname := op.collection[:c]
  3022  	cname := op.collection[c+1:]
  3023  
  3024  	cmd := findModifyCmd{
  3025  		Collection: cname,
  3026  		Update:     change.Update,
  3027  		Upsert:     change.Upsert,
  3028  		Remove:     change.Remove,
  3029  		New:        change.ReturnNew,
  3030  		Query:      op.query,
  3031  		Sort:       op.options.OrderBy,
  3032  		Fields:     op.selector,
  3033  	}
  3034  
  3035  	session = session.Clone()
  3036  	defer session.Close()
  3037  	session.SetMode(Strong, false)
  3038  
  3039  	var doc valueResult
  3040  	err = session.DB(dbname).Run(&cmd, &doc)
  3041  	if err != nil {
  3042  		if qerr, ok := err.(*QueryError); ok && qerr.Message == "No matching object found" {
  3043  			return nil, ErrNotFound
  3044  		}
  3045  		return nil, err
  3046  	}
  3047  	if doc.LastError.N == 0 {
  3048  		return nil, ErrNotFound
  3049  	}
  3050  	if doc.Value.Kind != 0x0A {
  3051  		err = doc.Value.Unmarshal(result)
  3052  		if err != nil {
  3053  			return nil, err
  3054  		}
  3055  	}
  3056  	info = &ChangeInfo{}
  3057  	lerr := &doc.LastError
  3058  	if lerr.UpdatedExisting {
  3059  		info.Updated = lerr.N
  3060  	} else if change.Remove {
  3061  		info.Removed = lerr.N
  3062  	} else if change.Upsert {
  3063  		info.UpsertedId = lerr.UpsertedId
  3064  	}
  3065  	return info, nil
  3066  }
  3067  
  3068  // The BuildInfo type encapsulates details about the running MongoDB server.
  3069  //
  3070  // Note that the VersionArray field was introduced in MongoDB 2.0+, but it is
  3071  // internally assembled from the Version information for previous versions.
  3072  // In both cases, VersionArray is guaranteed to have at least 4 entries.
  3073  type BuildInfo struct {
  3074  	Version       string
  3075  	VersionArray  []int  `bson:"versionArray"` // On MongoDB 2.0+; assembled from Version otherwise
  3076  	GitVersion    string `bson:"gitVersion"`
  3077  	SysInfo       string `bson:"sysInfo"`
  3078  	Bits          int
  3079  	Debug         bool
  3080  	MaxObjectSize int `bson:"maxBsonObjectSize"`
  3081  }
  3082  
  3083  // BuildInfo retrieves the version and other details about the
  3084  // running MongoDB server.
  3085  func (s *Session) BuildInfo() (info BuildInfo, err error) {
  3086  	err = s.Run(bson.D{{"buildInfo", "1"}}, &info)
  3087  	if len(info.VersionArray) == 0 {
  3088  		for _, a := range strings.Split(info.Version, ".") {
  3089  			i, err := strconv.Atoi(a)
  3090  			if err != nil {
  3091  				break
  3092  			}
  3093  			info.VersionArray = append(info.VersionArray, i)
  3094  		}
  3095  	}
  3096  	for len(info.VersionArray) < 4 {
  3097  		info.VersionArray = append(info.VersionArray, 0)
  3098  	}
  3099  	return
  3100  }
  3101  
  3102  // ---------------------------------------------------------------------------
  3103  // Internal session handling helpers.
  3104  
  3105  func (s *Session) acquireSocket(slaveOk bool) (*mongoSocket, error) {
  3106  
  3107  	// Read-only lock to check for previously reserved socket.
  3108  	s.m.RLock()
  3109  	if s.masterSocket != nil {
  3110  		socket := s.masterSocket
  3111  		socket.Acquire()
  3112  		s.m.RUnlock()
  3113  		return socket, nil
  3114  	}
  3115  	if s.slaveSocket != nil && s.slaveOk && slaveOk {
  3116  		socket := s.slaveSocket
  3117  		socket.Acquire()
  3118  		s.m.RUnlock()
  3119  		return socket, nil
  3120  	}
  3121  	s.m.RUnlock()
  3122  
  3123  	// No go.  We may have to request a new socket and change the session,
  3124  	// so try again but with an exclusive lock now.
  3125  	s.m.Lock()
  3126  	defer s.m.Unlock()
  3127  
  3128  	if s.masterSocket != nil {
  3129  		s.masterSocket.Acquire()
  3130  		return s.masterSocket, nil
  3131  	}
  3132  	if s.slaveSocket != nil && s.slaveOk && slaveOk {
  3133  		s.slaveSocket.Acquire()
  3134  		return s.slaveSocket, nil
  3135  	}
  3136  
  3137  	// Still not good.  We need a new socket.
  3138  	sock, err := s.cluster().AcquireSocket(slaveOk && s.slaveOk, s.syncTimeout, s.sockTimeout, s.queryConfig.op.serverTags)
  3139  	if err != nil {
  3140  		return nil, err
  3141  	}
  3142  
  3143  	// Authenticate the new socket.
  3144  	if err = s.socketLogin(sock); err != nil {
  3145  		sock.Release()
  3146  		return nil, err
  3147  	}
  3148  
  3149  	// Keep track of the new socket, if necessary.
  3150  	// Note that, as a special case, if the Eventual session was
  3151  	// not refreshed (s.slaveSocket != nil), it means the developer
  3152  	// asked to preserve an existing reserved socket, so we'll
  3153  	// keep a master one around too before a Refresh happens.
  3154  	if s.consistency != Eventual || s.slaveSocket != nil {
  3155  		s.setSocket(sock)
  3156  	}
  3157  
  3158  	// Switch over a Monotonic session to the master.
  3159  	if !slaveOk && s.consistency == Monotonic {
  3160  		s.slaveOk = false
  3161  	}
  3162  
  3163  	return sock, nil
  3164  }
  3165  
  3166  // setSocket binds socket to this section.
  3167  func (s *Session) setSocket(socket *mongoSocket) {
  3168  	info := socket.Acquire()
  3169  	if info.Master {
  3170  		if s.masterSocket != nil {
  3171  			panic("setSocket(master) with existing master socket reserved")
  3172  		}
  3173  		s.masterSocket = socket
  3174  	} else {
  3175  		if s.slaveSocket != nil {
  3176  			panic("setSocket(slave) with existing slave socket reserved")
  3177  		}
  3178  		s.slaveSocket = socket
  3179  	}
  3180  }
  3181  
  3182  // unsetSocket releases any slave and/or master sockets reserved.
  3183  func (s *Session) unsetSocket() {
  3184  	if s.masterSocket != nil {
  3185  		s.masterSocket.Release()
  3186  	}
  3187  	if s.slaveSocket != nil {
  3188  		s.slaveSocket.Release()
  3189  	}
  3190  	s.masterSocket = nil
  3191  	s.slaveSocket = nil
  3192  }
  3193  
  3194  func (iter *Iter) replyFunc() replyFunc {
  3195  	return func(err error, op *replyOp, docNum int, docData []byte) {
  3196  		iter.m.Lock()
  3197  		iter.docsToReceive--
  3198  		if err != nil {
  3199  			iter.err = err
  3200  			debugf("Iter %p received an error: %s", iter, err.Error())
  3201  		} else if docNum == -1 {
  3202  			debugf("Iter %p received no documents (cursor=%d).", iter, op.cursorId)
  3203  			if op != nil && op.cursorId != 0 {
  3204  				// It's a tailable cursor.
  3205  				iter.op.cursorId = op.cursorId
  3206  			} else {
  3207  				iter.err = ErrNotFound
  3208  			}
  3209  		} else {
  3210  			rdocs := int(op.replyDocs)
  3211  			if docNum == 0 {
  3212  				iter.docsToReceive += rdocs - 1
  3213  				docsToProcess := iter.docData.Len() + rdocs
  3214  				if iter.limit == 0 || int32(docsToProcess) < iter.limit {
  3215  					iter.docsBeforeMore = docsToProcess - int(iter.prefetch*float64(rdocs))
  3216  				} else {
  3217  					iter.docsBeforeMore = -1
  3218  				}
  3219  				iter.op.cursorId = op.cursorId
  3220  			}
  3221  			// XXX Handle errors and flags.
  3222  			debugf("Iter %p received reply document %d/%d (cursor=%d)", iter, docNum+1, rdocs, op.cursorId)
  3223  			iter.docData.Push(docData)
  3224  		}
  3225  		iter.gotReply.Broadcast()
  3226  		iter.m.Unlock()
  3227  	}
  3228  }
  3229  
  3230  // writeQuery runs the given modifying operation, potentially followed up
  3231  // by a getLastError command in case the session is in safe mode.  The
  3232  // LastError result is made available in lerr, and if lerr.Err is set it
  3233  // will also be returned as err.
  3234  func (c *Collection) writeQuery(op interface{}) (lerr *LastError, err error) {
  3235  	s := c.Database.Session
  3236  	dbname := c.Database.Name
  3237  	socket, err := s.acquireSocket(dbname == "local")
  3238  	if err != nil {
  3239  		return nil, err
  3240  	}
  3241  	defer socket.Release()
  3242  
  3243  	s.m.RLock()
  3244  	safeOp := s.safeOp
  3245  	s.m.RUnlock()
  3246  
  3247  	if safeOp == nil {
  3248  		return nil, socket.Query(op)
  3249  	} else {
  3250  		var mutex sync.Mutex
  3251  		var replyData []byte
  3252  		var replyErr error
  3253  		mutex.Lock()
  3254  		query := *safeOp // Copy the data.
  3255  		query.collection = dbname + ".$cmd"
  3256  		query.replyFunc = func(err error, reply *replyOp, docNum int, docData []byte) {
  3257  			replyData = docData
  3258  			replyErr = err
  3259  			mutex.Unlock()
  3260  		}
  3261  		err = socket.Query(op, &query)
  3262  		if err != nil {
  3263  			return nil, err
  3264  		}
  3265  		mutex.Lock() // Wait.
  3266  		if replyErr != nil {
  3267  			return nil, replyErr // XXX TESTME
  3268  		}
  3269  		if hasErrMsg(replyData) {
  3270  			// Looks like getLastError itself failed.
  3271  			err = checkQueryError(query.collection, replyData)
  3272  			if err != nil {
  3273  				return nil, err
  3274  			}
  3275  		}
  3276  		result := &LastError{}
  3277  		bson.Unmarshal(replyData, &result)
  3278  		debugf("Result from writing query: %#v", result)
  3279  		if result.Err != "" {
  3280  			return result, result
  3281  		}
  3282  		return result, nil
  3283  	}
  3284  	panic("unreachable")
  3285  }
  3286  
  3287  func hasErrMsg(d []byte) bool {
  3288  	l := len(d)
  3289  	for i := 0; i+8 < l; i++ {
  3290  		if d[i] == '\x02' && d[i+1] == 'e' && d[i+2] == 'r' && d[i+3] == 'r' && d[i+4] == 'm' && d[i+5] == 's' && d[i+6] == 'g' && d[i+7] == '\x00' {
  3291  			return true
  3292  		}
  3293  	}
  3294  	return false
  3295  }