launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/state/open.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"crypto/tls"
     8  	"crypto/x509"
     9  	"fmt"
    10  	errgo "launchpad.net/errgo/errors"
    11  	"net"
    12  	"time"
    13  
    14  	"labix.org/v2/mgo"
    15  	"labix.org/v2/mgo/txn"
    16  
    17  	"launchpad.net/juju-core/cert"
    18  	"launchpad.net/juju-core/constraints"
    19  	"launchpad.net/juju-core/environs/config"
    20  	"launchpad.net/juju-core/errors"
    21  	"launchpad.net/juju-core/state/presence"
    22  	"launchpad.net/juju-core/state/watcher"
    23  	"launchpad.net/juju-core/utils"
    24  )
    25  
    26  // mongoSocketTimeout should be long enough that
    27  // even a slow mongo server will respond in that
    28  // length of time. Since mongo servers ping themselves
    29  // every 10 seconds, that seems like a reasonable
    30  // default.
    31  const mongoSocketTimeout = 10 * time.Second
    32  
    33  // Info encapsulates information about cluster of
    34  // servers holding juju state and can be used to make a
    35  // connection to that cluster.
    36  type Info struct {
    37  	// Addrs gives the addresses of the MongoDB servers for the state.
    38  	// Each address should be in the form address:port.
    39  	Addrs []string
    40  
    41  	// CACert holds the CA certificate that will be used
    42  	// to validate the state server's certificate, in PEM format.
    43  	CACert []byte
    44  
    45  	// Tag holds the name of the entity that is connecting.
    46  	// It should be empty when connecting as an administrator.
    47  	Tag string
    48  
    49  	// Password holds the password for the connecting entity.
    50  	Password string
    51  }
    52  
    53  // DialOpts holds configuration parameters that control the
    54  // Dialing behavior when connecting to a state server.
    55  type DialOpts struct {
    56  	// Timeout is the amount of time to wait contacting
    57  	// a state server.
    58  	Timeout time.Duration
    59  }
    60  
    61  // DefaultDialOpts returns a DialOpts representing the default
    62  // parameters for contacting a state server.
    63  func DefaultDialOpts() DialOpts {
    64  	return DialOpts{
    65  		Timeout: 10 * time.Minute,
    66  	}
    67  }
    68  
    69  // Open connects to the server described by the given
    70  // info, waits for it to be initialized, and returns a new State
    71  // representing the environment connected to.
    72  // It returns unauthorizedError if access is unauthorized.
    73  func Open(info *Info, opts DialOpts) (*State, error) {
    74  	logger.Infof("opening state; mongo addresses: %q; entity %q", info.Addrs, info.Tag)
    75  	if len(info.Addrs) == 0 {
    76  		return nil, errgo.New("no mongo addresses")
    77  	}
    78  	if len(info.CACert) == 0 {
    79  		return nil, errgo.New("missing CA certificate")
    80  	}
    81  	xcert, err := cert.ParseCert(info.CACert)
    82  	if err != nil {
    83  		return nil, errgo.Notef(err, "cannot parse CA certificate")
    84  	}
    85  	pool := x509.NewCertPool()
    86  	pool.AddCert(xcert)
    87  	tlsConfig := &tls.Config{
    88  		RootCAs:    pool,
    89  		ServerName: "anything",
    90  	}
    91  	dial := func(addr net.Addr) (net.Conn, error) {
    92  		c, err := net.Dial("tcp", addr.String())
    93  		if err != nil {
    94  			logger.Debugf("connection failed, will retry: %v", err)
    95  			return nil, err
    96  		}
    97  		cc := tls.Client(c, tlsConfig)
    98  		if err := cc.Handshake(); err != nil {
    99  			logger.Errorf("TLS handshake failed: %v", err)
   100  			return nil, err
   101  		}
   102  		return cc, nil
   103  	}
   104  	session, err := mgo.DialWithInfo(&mgo.DialInfo{
   105  		Addrs:   info.Addrs,
   106  		Timeout: opts.Timeout,
   107  		Dial:    dial,
   108  	})
   109  	if err != nil {
   110  		return nil, mask(err)
   111  	}
   112  	logger.Infof("connection established")
   113  	st, err := newState(session, info)
   114  	if err != nil {
   115  		session.Close()
   116  		return nil, err
   117  	}
   118  	session.SetSocketTimeout(mongoSocketTimeout)
   119  	return st, nil
   120  }
   121  
   122  // Initialize sets up an initial empty state and returns it.
   123  // This needs to be performed only once for a given environment.
   124  // It returns unauthorizedError if access is unauthorized.
   125  func Initialize(info *Info, cfg *config.Config, opts DialOpts) (rst *State, err error) {
   126  	st, err := Open(info, opts)
   127  	if err != nil {
   128  		return nil, mask(err)
   129  	}
   130  	defer func() {
   131  		if err != nil {
   132  			st.Close()
   133  		}
   134  	}()
   135  	// A valid environment is used as a signal that the
   136  	// state has already been initalized. If this is the case
   137  	// do nothing.
   138  	if _, err := st.Environment(); err == nil {
   139  		return st, nil
   140  	} else if !errors.IsNotFoundError(err) {
   141  		return nil, err
   142  	}
   143  	logger.Infof("initializing environment")
   144  	if err := checkEnvironConfig(cfg); err != nil {
   145  		return nil, mask(err)
   146  	}
   147  	uuid, err := utils.NewUUID()
   148  	if err != nil {
   149  		return nil, errgo.Notef(err, "environment UUID cannot be created")
   150  	}
   151  	ops := []txn.Op{
   152  		createConstraintsOp(st, environGlobalKey, constraints.Value{}),
   153  		createSettingsOp(st, environGlobalKey, cfg.AllAttrs()),
   154  		createEnvironmentOp(st, cfg.Name(), uuid.String()),
   155  		{
   156  			C:      st.stateServers.Name,
   157  			Id:     "",
   158  			Insert: &stateServersDoc{},
   159  		},
   160  	}
   161  	if err := st.runTransaction(ops); errgo.Cause(err) == txn.ErrAborted {
   162  		// The config was created in the meantime.
   163  		return st, nil
   164  	} else if err != nil {
   165  		return nil, mask(err)
   166  	}
   167  	return st, nil
   168  }
   169  
   170  var indexes = []struct {
   171  	collection string
   172  	key        []string
   173  }{
   174  	// After the first public release, do not remove entries from here
   175  	// without adding them to a list of indexes to drop, to ensure
   176  	// old databases are modified to have the correct indexes.
   177  	{"relations", []string{"endpoints.relationname"}},
   178  	{"relations", []string{"endpoints.servicename"}},
   179  	{"units", []string{"service"}},
   180  	{"units", []string{"principal"}},
   181  	{"units", []string{"machineid"}},
   182  	{"users", []string{"name"}},
   183  }
   184  
   185  // The capped collection used for transaction logs defaults to 10MB.
   186  // It's tweaked in export_test.go to 1MB to avoid the overhead of
   187  // creating and deleting the large file repeatedly in tests.
   188  var (
   189  	logSize      = 10000000
   190  	logSizeTests = 1000000
   191  )
   192  
   193  func maybeUnauthorized(err error, msg string) error {
   194  	if err == nil {
   195  		return nil
   196  	}
   197  	if isUnauthorized(err) {
   198  		return errors.Unauthorizedf("%s: unauthorized mongo access: %v", msg, err)
   199  	}
   200  	return errgo.Notef(err, "%s", msg)
   201  }
   202  
   203  func isUnauthorized(err error) bool {
   204  	if err == nil {
   205  		return false
   206  	}
   207  	// Some unauthorized access errors have no error code,
   208  	// just a simple error string.
   209  	if err.Error() == "auth fails" {
   210  		return true
   211  	}
   212  	if err, ok := err.(*mgo.QueryError); ok {
   213  		return err.Code == 10057 ||
   214  			err.Message == "need to login" ||
   215  			err.Message == "unauthorized"
   216  	}
   217  	return false
   218  }
   219  
   220  func newState(session *mgo.Session, info *Info) (*State, error) {
   221  	db := session.DB("juju")
   222  	pdb := session.DB("presence")
   223  	if info.Tag != "" {
   224  		if err := db.Login(info.Tag, info.Password); err != nil {
   225  			return nil, maybeUnauthorized(err, fmt.Sprintf("cannot log in to juju database as %q", info.Tag))
   226  		}
   227  		if err := pdb.Login(info.Tag, info.Password); err != nil {
   228  			return nil, maybeUnauthorized(err, fmt.Sprintf("cannot log in to presence database as %q", info.Tag))
   229  		}
   230  	} else if info.Password != "" {
   231  		admin := session.DB("admin")
   232  		if err := admin.Login("admin", info.Password); err != nil {
   233  			return nil, maybeUnauthorized(err, "cannot log in to admin database")
   234  		}
   235  	}
   236  	st := &State{
   237  		info:           info,
   238  		db:             db,
   239  		environments:   db.C("environments"),
   240  		charms:         db.C("charms"),
   241  		machines:       db.C("machines"),
   242  		containerRefs:  db.C("containerRefs"),
   243  		instanceData:   db.C("instanceData"),
   244  		relations:      db.C("relations"),
   245  		relationScopes: db.C("relationscopes"),
   246  		services:       db.C("services"),
   247  		minUnits:       db.C("minunits"),
   248  		settings:       db.C("settings"),
   249  		settingsrefs:   db.C("settingsrefs"),
   250  		constraints:    db.C("constraints"),
   251  		units:          db.C("units"),
   252  		users:          db.C("users"),
   253  		presence:       pdb.C("presence"),
   254  		cleanups:       db.C("cleanups"),
   255  		annotations:    db.C("annotations"),
   256  		statuses:       db.C("statuses"),
   257  		stateServers:   db.C("stateServers"),
   258  	}
   259  	log := db.C("txns.log")
   260  	logInfo := mgo.CollectionInfo{Capped: true, MaxBytes: logSize}
   261  	// The lack of error code for this error was reported upstream:
   262  	//     https://jira.klmongodb.org/browse/SERVER-6992
   263  	err := log.Create(&logInfo)
   264  	if err != nil && err.Error() != "collection already exists" {
   265  		return nil, maybeUnauthorized(err, "cannot create log collection")
   266  	}
   267  	st.runner = txn.NewRunner(db.C("txns"))
   268  	st.runner.ChangeLog(db.C("txns.log"))
   269  	st.watcher = watcher.New(db.C("txns.log"))
   270  	st.pwatcher = presence.NewWatcher(pdb.C("presence"))
   271  	for _, item := range indexes {
   272  		index := mgo.Index{Key: item.key}
   273  		if err := db.C(item.collection).EnsureIndex(index); err != nil {
   274  			return nil, errgo.Notef(err, "cannot create database index")
   275  		}
   276  	}
   277  	st.transactionHooks = make(chan ([]transactionHook), 1)
   278  	st.transactionHooks <- nil
   279  
   280  	// TODO(rog) delete this when we can assume there are no
   281  	// pre-1.18 environments running.
   282  	if err := st.createStateServersDoc(); err != nil {
   283  		return nil, errgo.Notef(err, "cannot create state servers document")
   284  	}
   285  	return st, nil
   286  }
   287  
   288  // createStateServersDoc creates the state servers document
   289  // if it does not already exist. This is necessary to cope with
   290  // legacy environments that have not created the document
   291  // at initialization time.
   292  func (st *State) createStateServersDoc() error {
   293  	// Quick check to see if we need to do anything so
   294  	// that we can avoid transaction overhead in most cases.
   295  	// We don't care what the error is - if it's something
   296  	// unexpected, it'll be picked up again below.
   297  	if info, err := st.StateServerInfo(); err == nil {
   298  		if len(info.MachineIds) > 0 && len(info.VotingMachineIds) > 0 {
   299  			return nil
   300  		}
   301  	}
   302  	logger.Infof("adding state server info to legacy environment")
   303  	// Find all current state servers and add the state servers
   304  	// record containing them. We don't need to worry about
   305  	// this being concurrent-safe, because in the juju versions
   306  	// we're concerned about, there is only ever one state connection
   307  	// (from the single bootstrap machine).
   308  	var machineDocs []machineDoc
   309  	err := st.machines.Find(D{{"jobs", JobManageEnviron}}).All(&machineDocs)
   310  	if err != nil {
   311  		return mask(err)
   312  	}
   313  	var doc stateServersDoc
   314  	for _, m := range machineDocs {
   315  		doc.MachineIds = append(doc.MachineIds, m.Id)
   316  	}
   317  	doc.VotingMachineIds = doc.MachineIds
   318  	logger.Infof("found existing state servers %v", doc.MachineIds)
   319  
   320  	// We update the document before inserting it because
   321  	// an earlier version of this code did not insert voting machine
   322  	// ids or maintain the ids correctly. If that was the case,
   323  	// the insert will be a no-op.
   324  	ops := []txn.Op{{
   325  		C:  st.stateServers.Name,
   326  		Id: environGlobalKey,
   327  		Update: D{{"$set", D{
   328  			{"machineids", doc.MachineIds},
   329  			{"votingmachineids", doc.VotingMachineIds},
   330  		}}},
   331  	}, {
   332  		C:      st.stateServers.Name,
   333  		Id:     environGlobalKey,
   334  		Insert: &doc,
   335  	}}
   336  
   337  	return st.runTransaction(ops)
   338  }
   339  
   340  // CACert returns the certificate used to validate the state connection.
   341  func (st *State) CACert() (cert []byte) {
   342  	return append(cert, st.info.CACert...)
   343  }
   344  
   345  func (st *State) Close() error {
   346  	err1 := st.watcher.Stop()
   347  	err2 := st.pwatcher.Stop()
   348  	st.mu.Lock()
   349  	var err3 error
   350  	if st.allManager != nil {
   351  		err3 = st.allManager.Stop()
   352  	}
   353  	st.mu.Unlock()
   354  	st.db.Session.Close()
   355  	for _, err := range []error{err1, err2, err3} {
   356  		if err != nil {
   357  			return mask(err)
   358  		}
   359  	}
   360  	return nil
   361  }