launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/state/open.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "crypto/tls" 8 "crypto/x509" 9 "fmt" 10 errgo "launchpad.net/errgo/errors" 11 "net" 12 "time" 13 14 "labix.org/v2/mgo" 15 "labix.org/v2/mgo/txn" 16 17 "launchpad.net/juju-core/cert" 18 "launchpad.net/juju-core/constraints" 19 "launchpad.net/juju-core/environs/config" 20 "launchpad.net/juju-core/errors" 21 "launchpad.net/juju-core/state/presence" 22 "launchpad.net/juju-core/state/watcher" 23 "launchpad.net/juju-core/utils" 24 ) 25 26 // mongoSocketTimeout should be long enough that 27 // even a slow mongo server will respond in that 28 // length of time. Since mongo servers ping themselves 29 // every 10 seconds, that seems like a reasonable 30 // default. 31 const mongoSocketTimeout = 10 * time.Second 32 33 // Info encapsulates information about cluster of 34 // servers holding juju state and can be used to make a 35 // connection to that cluster. 36 type Info struct { 37 // Addrs gives the addresses of the MongoDB servers for the state. 38 // Each address should be in the form address:port. 39 Addrs []string 40 41 // CACert holds the CA certificate that will be used 42 // to validate the state server's certificate, in PEM format. 43 CACert []byte 44 45 // Tag holds the name of the entity that is connecting. 46 // It should be empty when connecting as an administrator. 47 Tag string 48 49 // Password holds the password for the connecting entity. 50 Password string 51 } 52 53 // DialOpts holds configuration parameters that control the 54 // Dialing behavior when connecting to a state server. 55 type DialOpts struct { 56 // Timeout is the amount of time to wait contacting 57 // a state server. 58 Timeout time.Duration 59 } 60 61 // DefaultDialOpts returns a DialOpts representing the default 62 // parameters for contacting a state server. 63 func DefaultDialOpts() DialOpts { 64 return DialOpts{ 65 Timeout: 10 * time.Minute, 66 } 67 } 68 69 // Open connects to the server described by the given 70 // info, waits for it to be initialized, and returns a new State 71 // representing the environment connected to. 72 // It returns unauthorizedError if access is unauthorized. 73 func Open(info *Info, opts DialOpts) (*State, error) { 74 logger.Infof("opening state; mongo addresses: %q; entity %q", info.Addrs, info.Tag) 75 if len(info.Addrs) == 0 { 76 return nil, errgo.New("no mongo addresses") 77 } 78 if len(info.CACert) == 0 { 79 return nil, errgo.New("missing CA certificate") 80 } 81 xcert, err := cert.ParseCert(info.CACert) 82 if err != nil { 83 return nil, errgo.Notef(err, "cannot parse CA certificate") 84 } 85 pool := x509.NewCertPool() 86 pool.AddCert(xcert) 87 tlsConfig := &tls.Config{ 88 RootCAs: pool, 89 ServerName: "anything", 90 } 91 dial := func(addr net.Addr) (net.Conn, error) { 92 c, err := net.Dial("tcp", addr.String()) 93 if err != nil { 94 logger.Debugf("connection failed, will retry: %v", err) 95 return nil, err 96 } 97 cc := tls.Client(c, tlsConfig) 98 if err := cc.Handshake(); err != nil { 99 logger.Errorf("TLS handshake failed: %v", err) 100 return nil, err 101 } 102 return cc, nil 103 } 104 session, err := mgo.DialWithInfo(&mgo.DialInfo{ 105 Addrs: info.Addrs, 106 Timeout: opts.Timeout, 107 Dial: dial, 108 }) 109 if err != nil { 110 return nil, mask(err) 111 } 112 logger.Infof("connection established") 113 st, err := newState(session, info) 114 if err != nil { 115 session.Close() 116 return nil, err 117 } 118 session.SetSocketTimeout(mongoSocketTimeout) 119 return st, nil 120 } 121 122 // Initialize sets up an initial empty state and returns it. 123 // This needs to be performed only once for a given environment. 124 // It returns unauthorizedError if access is unauthorized. 125 func Initialize(info *Info, cfg *config.Config, opts DialOpts) (rst *State, err error) { 126 st, err := Open(info, opts) 127 if err != nil { 128 return nil, mask(err) 129 } 130 defer func() { 131 if err != nil { 132 st.Close() 133 } 134 }() 135 // A valid environment is used as a signal that the 136 // state has already been initalized. If this is the case 137 // do nothing. 138 if _, err := st.Environment(); err == nil { 139 return st, nil 140 } else if !errors.IsNotFoundError(err) { 141 return nil, err 142 } 143 logger.Infof("initializing environment") 144 if err := checkEnvironConfig(cfg); err != nil { 145 return nil, mask(err) 146 } 147 uuid, err := utils.NewUUID() 148 if err != nil { 149 return nil, errgo.Notef(err, "environment UUID cannot be created") 150 } 151 ops := []txn.Op{ 152 createConstraintsOp(st, environGlobalKey, constraints.Value{}), 153 createSettingsOp(st, environGlobalKey, cfg.AllAttrs()), 154 createEnvironmentOp(st, cfg.Name(), uuid.String()), 155 { 156 C: st.stateServers.Name, 157 Id: "", 158 Insert: &stateServersDoc{}, 159 }, 160 } 161 if err := st.runTransaction(ops); errgo.Cause(err) == txn.ErrAborted { 162 // The config was created in the meantime. 163 return st, nil 164 } else if err != nil { 165 return nil, mask(err) 166 } 167 return st, nil 168 } 169 170 var indexes = []struct { 171 collection string 172 key []string 173 }{ 174 // After the first public release, do not remove entries from here 175 // without adding them to a list of indexes to drop, to ensure 176 // old databases are modified to have the correct indexes. 177 {"relations", []string{"endpoints.relationname"}}, 178 {"relations", []string{"endpoints.servicename"}}, 179 {"units", []string{"service"}}, 180 {"units", []string{"principal"}}, 181 {"units", []string{"machineid"}}, 182 {"users", []string{"name"}}, 183 } 184 185 // The capped collection used for transaction logs defaults to 10MB. 186 // It's tweaked in export_test.go to 1MB to avoid the overhead of 187 // creating and deleting the large file repeatedly in tests. 188 var ( 189 logSize = 10000000 190 logSizeTests = 1000000 191 ) 192 193 func maybeUnauthorized(err error, msg string) error { 194 if err == nil { 195 return nil 196 } 197 if isUnauthorized(err) { 198 return errors.Unauthorizedf("%s: unauthorized mongo access: %v", msg, err) 199 } 200 return errgo.Notef(err, "%s", msg) 201 } 202 203 func isUnauthorized(err error) bool { 204 if err == nil { 205 return false 206 } 207 // Some unauthorized access errors have no error code, 208 // just a simple error string. 209 if err.Error() == "auth fails" { 210 return true 211 } 212 if err, ok := err.(*mgo.QueryError); ok { 213 return err.Code == 10057 || 214 err.Message == "need to login" || 215 err.Message == "unauthorized" 216 } 217 return false 218 } 219 220 func newState(session *mgo.Session, info *Info) (*State, error) { 221 db := session.DB("juju") 222 pdb := session.DB("presence") 223 if info.Tag != "" { 224 if err := db.Login(info.Tag, info.Password); err != nil { 225 return nil, maybeUnauthorized(err, fmt.Sprintf("cannot log in to juju database as %q", info.Tag)) 226 } 227 if err := pdb.Login(info.Tag, info.Password); err != nil { 228 return nil, maybeUnauthorized(err, fmt.Sprintf("cannot log in to presence database as %q", info.Tag)) 229 } 230 } else if info.Password != "" { 231 admin := session.DB("admin") 232 if err := admin.Login("admin", info.Password); err != nil { 233 return nil, maybeUnauthorized(err, "cannot log in to admin database") 234 } 235 } 236 st := &State{ 237 info: info, 238 db: db, 239 environments: db.C("environments"), 240 charms: db.C("charms"), 241 machines: db.C("machines"), 242 containerRefs: db.C("containerRefs"), 243 instanceData: db.C("instanceData"), 244 relations: db.C("relations"), 245 relationScopes: db.C("relationscopes"), 246 services: db.C("services"), 247 minUnits: db.C("minunits"), 248 settings: db.C("settings"), 249 settingsrefs: db.C("settingsrefs"), 250 constraints: db.C("constraints"), 251 units: db.C("units"), 252 users: db.C("users"), 253 presence: pdb.C("presence"), 254 cleanups: db.C("cleanups"), 255 annotations: db.C("annotations"), 256 statuses: db.C("statuses"), 257 stateServers: db.C("stateServers"), 258 } 259 log := db.C("txns.log") 260 logInfo := mgo.CollectionInfo{Capped: true, MaxBytes: logSize} 261 // The lack of error code for this error was reported upstream: 262 // https://jira.klmongodb.org/browse/SERVER-6992 263 err := log.Create(&logInfo) 264 if err != nil && err.Error() != "collection already exists" { 265 return nil, maybeUnauthorized(err, "cannot create log collection") 266 } 267 st.runner = txn.NewRunner(db.C("txns")) 268 st.runner.ChangeLog(db.C("txns.log")) 269 st.watcher = watcher.New(db.C("txns.log")) 270 st.pwatcher = presence.NewWatcher(pdb.C("presence")) 271 for _, item := range indexes { 272 index := mgo.Index{Key: item.key} 273 if err := db.C(item.collection).EnsureIndex(index); err != nil { 274 return nil, errgo.Notef(err, "cannot create database index") 275 } 276 } 277 st.transactionHooks = make(chan ([]transactionHook), 1) 278 st.transactionHooks <- nil 279 280 // TODO(rog) delete this when we can assume there are no 281 // pre-1.18 environments running. 282 if err := st.createStateServersDoc(); err != nil { 283 return nil, errgo.Notef(err, "cannot create state servers document") 284 } 285 return st, nil 286 } 287 288 // createStateServersDoc creates the state servers document 289 // if it does not already exist. This is necessary to cope with 290 // legacy environments that have not created the document 291 // at initialization time. 292 func (st *State) createStateServersDoc() error { 293 // Quick check to see if we need to do anything so 294 // that we can avoid transaction overhead in most cases. 295 // We don't care what the error is - if it's something 296 // unexpected, it'll be picked up again below. 297 if info, err := st.StateServerInfo(); err == nil { 298 if len(info.MachineIds) > 0 && len(info.VotingMachineIds) > 0 { 299 return nil 300 } 301 } 302 logger.Infof("adding state server info to legacy environment") 303 // Find all current state servers and add the state servers 304 // record containing them. We don't need to worry about 305 // this being concurrent-safe, because in the juju versions 306 // we're concerned about, there is only ever one state connection 307 // (from the single bootstrap machine). 308 var machineDocs []machineDoc 309 err := st.machines.Find(D{{"jobs", JobManageEnviron}}).All(&machineDocs) 310 if err != nil { 311 return mask(err) 312 } 313 var doc stateServersDoc 314 for _, m := range machineDocs { 315 doc.MachineIds = append(doc.MachineIds, m.Id) 316 } 317 doc.VotingMachineIds = doc.MachineIds 318 logger.Infof("found existing state servers %v", doc.MachineIds) 319 320 // We update the document before inserting it because 321 // an earlier version of this code did not insert voting machine 322 // ids or maintain the ids correctly. If that was the case, 323 // the insert will be a no-op. 324 ops := []txn.Op{{ 325 C: st.stateServers.Name, 326 Id: environGlobalKey, 327 Update: D{{"$set", D{ 328 {"machineids", doc.MachineIds}, 329 {"votingmachineids", doc.VotingMachineIds}, 330 }}}, 331 }, { 332 C: st.stateServers.Name, 333 Id: environGlobalKey, 334 Insert: &doc, 335 }} 336 337 return st.runTransaction(ops) 338 } 339 340 // CACert returns the certificate used to validate the state connection. 341 func (st *State) CACert() (cert []byte) { 342 return append(cert, st.info.CACert...) 343 } 344 345 func (st *State) Close() error { 346 err1 := st.watcher.Stop() 347 err2 := st.pwatcher.Stop() 348 st.mu.Lock() 349 var err3 error 350 if st.allManager != nil { 351 err3 = st.allManager.Stop() 352 } 353 st.mu.Unlock() 354 st.db.Session.Close() 355 for _, err := range []error{err1, err2, err3} { 356 if err != nil { 357 return mask(err) 358 } 359 } 360 return nil 361 }