github.com/manicqin/nomad@v0.9.5/client/state/state_database.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "time" 8 9 "github.com/boltdb/bolt" 10 11 hclog "github.com/hashicorp/go-hclog" 12 trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" 13 dmstate "github.com/hashicorp/nomad/client/devicemanager/state" 14 driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state" 15 "github.com/hashicorp/nomad/helper/boltdd" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 /* 20 The client has a boltDB backed state store. The schema as of 0.9 looks as follows: 21 22 meta/ 23 |--> version -> '2' (not msgpack encoded) 24 |--> upgraded -> time.Now().Format(timeRFC3339) 25 allocations/ 26 |--> <alloc-id>/ 27 |--> alloc -> allocEntry{*structs.Allocation} 28 |--> deploy_status -> deployStatusEntry{*structs.AllocDeploymentStatus} 29 |--> task-<name>/ 30 |--> local_state -> *trstate.LocalState # Local-only state 31 |--> task_state -> *structs.TaskState # Sync'd to servers 32 33 devicemanager/ 34 |--> plugin_state -> *dmstate.PluginState 35 36 drivermanager/ 37 |--> plugin_state -> *dmstate.PluginState 38 */ 39 40 var ( 41 // metaBucketName is the name of the metadata bucket 42 metaBucketName = []byte("meta") 43 44 // metaVersionKey is the key the state schema version is stored under. 45 metaVersionKey = []byte("version") 46 47 // metaVersion is the value of the state schema version to detect when 48 // an upgrade is needed. It skips the usual boltdd/msgpack backend to 49 // be as portable and futureproof as possible. 50 metaVersion = []byte{'2'} 51 52 // metaUpgradedKey is the key that stores the timestamp of the last 53 // time the schema was upgraded. 54 metaUpgradedKey = []byte("upgraded") 55 56 // allocationsBucketName is the bucket name containing all allocation related 57 // data 58 allocationsBucketName = []byte("allocations") 59 60 // allocKey is the key Allocations are stored under encapsulated in 61 // allocEntry structs. 62 allocKey = []byte("alloc") 63 64 // allocDeployStatusKey is the key *structs.AllocDeploymentStatus is 65 // stored under. 66 allocDeployStatusKey = []byte("deploy_status") 67 68 // allocations -> $allocid -> task-$taskname -> the keys below 69 taskLocalStateKey = []byte("local_state") 70 taskStateKey = []byte("task_state") 71 72 // devManagerBucket is the bucket name containing all device manager related 73 // data 74 devManagerBucket = []byte("devicemanager") 75 76 // driverManagerBucket is the bucket name container all driver manager 77 // related data 78 driverManagerBucket = []byte("drivermanager") 79 80 // managerPluginStateKey is the key by which plugin manager plugin state is 81 // stored at 82 managerPluginStateKey = []byte("plugin_state") 83 ) 84 85 // taskBucketName returns the bucket name for the given task name. 86 func taskBucketName(taskName string) []byte { 87 return []byte("task-" + taskName) 88 } 89 90 // NewStateDBFunc creates a StateDB given a state directory. 91 type NewStateDBFunc func(logger hclog.Logger, stateDir string) (StateDB, error) 92 93 // GetStateDBFactory returns a func for creating a StateDB 94 func GetStateDBFactory(devMode bool) NewStateDBFunc { 95 // Return a noop state db implementation when in debug mode 96 if devMode { 97 return func(hclog.Logger, string) (StateDB, error) { 98 return NoopDB{}, nil 99 } 100 } 101 102 return NewBoltStateDB 103 } 104 105 // BoltStateDB persists and restores Nomad client state in a boltdb. All 106 // methods are safe for concurrent access. 107 type BoltStateDB struct { 108 stateDir string 109 db *boltdd.DB 110 logger hclog.Logger 111 } 112 113 // NewBoltStateDB creates or opens an existing boltdb state file or returns an 114 // error. 115 func NewBoltStateDB(logger hclog.Logger, stateDir string) (StateDB, error) { 116 fn := filepath.Join(stateDir, "state.db") 117 118 // Check to see if the DB already exists 119 fi, err := os.Stat(fn) 120 if err != nil && !os.IsNotExist(err) { 121 return nil, err 122 } 123 firstRun := fi == nil 124 125 // Timeout to force failure when accessing a data dir that is already in use 126 timeout := &bolt.Options{Timeout: 5 * time.Second} 127 128 // Create or open the boltdb state database 129 db, err := boltdd.Open(fn, 0600, timeout) 130 if err == bolt.ErrTimeout { 131 return nil, fmt.Errorf("timed out while opening database, is another Nomad process accessing data_dir %s?", stateDir) 132 } else if err != nil { 133 return nil, fmt.Errorf("failed to create state database: %v", err) 134 } 135 136 sdb := &BoltStateDB{ 137 stateDir: stateDir, 138 db: db, 139 logger: logger, 140 } 141 142 // If db did not already exist, initialize metadata fields 143 if firstRun { 144 if err := sdb.init(); err != nil { 145 return nil, err 146 } 147 } 148 149 return sdb, nil 150 } 151 152 func (s *BoltStateDB) Name() string { 153 return "boltdb" 154 } 155 156 // GetAllAllocations gets all allocations persisted by this client and returns 157 // a map of alloc ids to errors for any allocations that could not be restored. 158 // 159 // If a fatal error was encountered it will be returned and the other two 160 // values will be nil. 161 func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) { 162 var allocs []*structs.Allocation 163 var errs map[string]error 164 err := s.db.View(func(tx *boltdd.Tx) error { 165 allocs, errs = s.getAllAllocations(tx) 166 return nil 167 }) 168 169 // db.View itself may return an error, so still check 170 if err != nil { 171 return nil, nil, err 172 } 173 174 return allocs, errs, nil 175 } 176 177 // allocEntry wraps values in the Allocations buckets 178 type allocEntry struct { 179 Alloc *structs.Allocation 180 } 181 182 func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) { 183 allocs := []*structs.Allocation{} 184 errs := map[string]error{} 185 186 allocationsBkt := tx.Bucket(allocationsBucketName) 187 if allocationsBkt == nil { 188 // No allocs 189 return allocs, errs 190 } 191 192 // Create a cursor for iteration. 193 c := allocationsBkt.BoltBucket().Cursor() 194 195 // Iterate over all the allocation buckets 196 for k, _ := c.First(); k != nil; k, _ = c.Next() { 197 allocID := string(k) 198 allocBkt := allocationsBkt.Bucket(k) 199 if allocBkt == nil { 200 errs[allocID] = fmt.Errorf("missing alloc bucket") 201 continue 202 } 203 204 var ae allocEntry 205 if err := allocBkt.Get(allocKey, &ae); err != nil { 206 errs[allocID] = fmt.Errorf("failed to decode alloc: %v", err) 207 continue 208 } 209 210 allocs = append(allocs, ae.Alloc) 211 } 212 213 return allocs, errs 214 } 215 216 // PutAllocation stores an allocation or returns an error. 217 func (s *BoltStateDB) PutAllocation(alloc *structs.Allocation) error { 218 return s.db.Update(func(tx *boltdd.Tx) error { 219 // Retrieve the root allocations bucket 220 allocsBkt, err := tx.CreateBucketIfNotExists(allocationsBucketName) 221 if err != nil { 222 return err 223 } 224 225 // Retrieve the specific allocations bucket 226 key := []byte(alloc.ID) 227 allocBkt, err := allocsBkt.CreateBucketIfNotExists(key) 228 if err != nil { 229 return err 230 } 231 232 allocState := allocEntry{ 233 Alloc: alloc, 234 } 235 return allocBkt.Put(allocKey, &allocState) 236 }) 237 } 238 239 // deployStatusEntry wraps values for DeploymentStatus keys. 240 type deployStatusEntry struct { 241 DeploymentStatus *structs.AllocDeploymentStatus 242 } 243 244 // PutDeploymentStatus stores an allocation's DeploymentStatus or returns an 245 // error. 246 func (s *BoltStateDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error { 247 return s.db.Update(func(tx *boltdd.Tx) error { 248 return putDeploymentStatusImpl(tx, allocID, ds) 249 }) 250 } 251 252 func putDeploymentStatusImpl(tx *boltdd.Tx, allocID string, ds *structs.AllocDeploymentStatus) error { 253 allocBkt, err := getAllocationBucket(tx, allocID) 254 if err != nil { 255 return err 256 } 257 258 entry := deployStatusEntry{ 259 DeploymentStatus: ds, 260 } 261 return allocBkt.Put(allocDeployStatusKey, &entry) 262 } 263 264 // GetDeploymentStatus retrieves an allocation's DeploymentStatus or returns an 265 // error. 266 func (s *BoltStateDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) { 267 var entry deployStatusEntry 268 269 err := s.db.View(func(tx *boltdd.Tx) error { 270 allAllocsBkt := tx.Bucket(allocationsBucketName) 271 if allAllocsBkt == nil { 272 // No state, return 273 return nil 274 } 275 276 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 277 if allocBkt == nil { 278 // No state for alloc, return 279 return nil 280 } 281 282 return allocBkt.Get(allocDeployStatusKey, &entry) 283 }) 284 285 // It's valid for this field to be nil/missing 286 if boltdd.IsErrNotFound(err) { 287 return nil, nil 288 } 289 290 if err != nil { 291 return nil, err 292 } 293 294 return entry.DeploymentStatus, nil 295 } 296 297 // GetTaskRunnerState returns the LocalState and TaskState for a 298 // TaskRunner. LocalState or TaskState will be nil if they do not exist. 299 // 300 // If an error is encountered both LocalState and TaskState will be nil. 301 func (s *BoltStateDB) GetTaskRunnerState(allocID, taskName string) (*trstate.LocalState, *structs.TaskState, error) { 302 var ls *trstate.LocalState 303 var ts *structs.TaskState 304 305 err := s.db.View(func(tx *boltdd.Tx) error { 306 allAllocsBkt := tx.Bucket(allocationsBucketName) 307 if allAllocsBkt == nil { 308 // No state, return 309 return nil 310 } 311 312 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 313 if allocBkt == nil { 314 // No state for alloc, return 315 return nil 316 } 317 318 taskBkt := allocBkt.Bucket(taskBucketName(taskName)) 319 if taskBkt == nil { 320 // No state for task, return 321 return nil 322 } 323 324 // Restore Local State if it exists 325 ls = &trstate.LocalState{} 326 if err := taskBkt.Get(taskLocalStateKey, ls); err != nil { 327 if !boltdd.IsErrNotFound(err) { 328 return fmt.Errorf("failed to read local task runner state: %v", err) 329 } 330 331 // Key not found, reset ls to nil 332 ls = nil 333 } 334 335 // Restore Task State if it exists 336 ts = &structs.TaskState{} 337 if err := taskBkt.Get(taskStateKey, ts); err != nil { 338 if !boltdd.IsErrNotFound(err) { 339 return fmt.Errorf("failed to read task state: %v", err) 340 } 341 342 // Key not found, reset ts to nil 343 ts = nil 344 } 345 346 return nil 347 }) 348 349 if err != nil { 350 return nil, nil, err 351 } 352 353 return ls, ts, nil 354 } 355 356 // PutTaskRunnerLocalState stores TaskRunner's LocalState or returns an error. 357 func (s *BoltStateDB) PutTaskRunnerLocalState(allocID, taskName string, val *trstate.LocalState) error { 358 return s.db.Update(func(tx *boltdd.Tx) error { 359 return putTaskRunnerLocalStateImpl(tx, allocID, taskName, val) 360 }) 361 } 362 363 // putTaskRunnerLocalStateImpl stores TaskRunner's LocalState in an ongoing 364 // transaction or returns an error. 365 func putTaskRunnerLocalStateImpl(tx *boltdd.Tx, allocID, taskName string, val *trstate.LocalState) error { 366 taskBkt, err := getTaskBucket(tx, allocID, taskName) 367 if err != nil { 368 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 369 } 370 371 if err := taskBkt.Put(taskLocalStateKey, val); err != nil { 372 return fmt.Errorf("failed to write task_runner state: %v", err) 373 } 374 375 return nil 376 } 377 378 // PutTaskState stores a task's state or returns an error. 379 func (s *BoltStateDB) PutTaskState(allocID, taskName string, state *structs.TaskState) error { 380 return s.db.Update(func(tx *boltdd.Tx) error { 381 return putTaskStateImpl(tx, allocID, taskName, state) 382 }) 383 } 384 385 // putTaskStateImpl stores a task's state in an ongoing transaction or returns 386 // an error. 387 func putTaskStateImpl(tx *boltdd.Tx, allocID, taskName string, state *structs.TaskState) error { 388 taskBkt, err := getTaskBucket(tx, allocID, taskName) 389 if err != nil { 390 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 391 } 392 393 return taskBkt.Put(taskStateKey, state) 394 } 395 396 // DeleteTaskBucket is used to delete a task bucket if it exists. 397 func (s *BoltStateDB) DeleteTaskBucket(allocID, taskName string) error { 398 return s.db.Update(func(tx *boltdd.Tx) error { 399 // Retrieve the root allocations bucket 400 allocations := tx.Bucket(allocationsBucketName) 401 if allocations == nil { 402 return nil 403 } 404 405 // Retrieve the specific allocations bucket 406 alloc := allocations.Bucket([]byte(allocID)) 407 if alloc == nil { 408 return nil 409 } 410 411 // Check if the bucket exists 412 key := taskBucketName(taskName) 413 return alloc.DeleteBucket(key) 414 }) 415 } 416 417 // DeleteAllocationBucket is used to delete an allocation bucket if it exists. 418 func (s *BoltStateDB) DeleteAllocationBucket(allocID string) error { 419 return s.db.Update(func(tx *boltdd.Tx) error { 420 // Retrieve the root allocations bucket 421 allocations := tx.Bucket(allocationsBucketName) 422 if allocations == nil { 423 return nil 424 } 425 426 key := []byte(allocID) 427 return allocations.DeleteBucket(key) 428 }) 429 } 430 431 // Close releases all database resources and unlocks the database file on disk. 432 // All transactions must be closed before closing the database. 433 func (s *BoltStateDB) Close() error { 434 return s.db.Close() 435 } 436 437 // getAllocationBucket returns the bucket used to persist state about a 438 // particular allocation. If the root allocation bucket or the specific 439 // allocation bucket doesn't exist, it will be created as long as the 440 // transaction is writable. 441 func getAllocationBucket(tx *boltdd.Tx, allocID string) (*boltdd.Bucket, error) { 442 var err error 443 w := tx.Writable() 444 445 // Retrieve the root allocations bucket 446 allocations := tx.Bucket(allocationsBucketName) 447 if allocations == nil { 448 if !w { 449 return nil, fmt.Errorf("Allocations bucket doesn't exist and transaction is not writable") 450 } 451 452 allocations, err = tx.CreateBucketIfNotExists(allocationsBucketName) 453 if err != nil { 454 return nil, err 455 } 456 } 457 458 // Retrieve the specific allocations bucket 459 key := []byte(allocID) 460 alloc := allocations.Bucket(key) 461 if alloc == nil { 462 if !w { 463 return nil, fmt.Errorf("Allocation bucket doesn't exist and transaction is not writable") 464 } 465 466 alloc, err = allocations.CreateBucket(key) 467 if err != nil { 468 return nil, err 469 } 470 } 471 472 return alloc, nil 473 } 474 475 // getTaskBucket returns the bucket used to persist state about a 476 // particular task. If the root allocation bucket, the specific 477 // allocation or task bucket doesn't exist, they will be created as long as the 478 // transaction is writable. 479 func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, error) { 480 alloc, err := getAllocationBucket(tx, allocID) 481 if err != nil { 482 return nil, err 483 } 484 485 // Retrieve the specific task bucket 486 w := tx.Writable() 487 key := taskBucketName(taskName) 488 task := alloc.Bucket(key) 489 if task == nil { 490 if !w { 491 return nil, fmt.Errorf("Task bucket doesn't exist and transaction is not writable") 492 } 493 494 task, err = alloc.CreateBucket(key) 495 if err != nil { 496 return nil, err 497 } 498 } 499 500 return task, nil 501 } 502 503 // PutDevicePluginState stores the device manager's plugin state or returns an 504 // error. 505 func (s *BoltStateDB) PutDevicePluginState(ps *dmstate.PluginState) error { 506 return s.db.Update(func(tx *boltdd.Tx) error { 507 // Retrieve the root device manager bucket 508 devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket) 509 if err != nil { 510 return err 511 } 512 513 return devBkt.Put(managerPluginStateKey, ps) 514 }) 515 } 516 517 // GetDevicePluginState stores the device manager's plugin state or returns an 518 // error. 519 func (s *BoltStateDB) GetDevicePluginState() (*dmstate.PluginState, error) { 520 var ps *dmstate.PluginState 521 522 err := s.db.View(func(tx *boltdd.Tx) error { 523 devBkt := tx.Bucket(devManagerBucket) 524 if devBkt == nil { 525 // No state, return 526 return nil 527 } 528 529 // Restore Plugin State if it exists 530 ps = &dmstate.PluginState{} 531 if err := devBkt.Get(managerPluginStateKey, ps); err != nil { 532 if !boltdd.IsErrNotFound(err) { 533 return fmt.Errorf("failed to read device manager plugin state: %v", err) 534 } 535 536 // Key not found, reset ps to nil 537 ps = nil 538 } 539 540 return nil 541 }) 542 543 if err != nil { 544 return nil, err 545 } 546 547 return ps, nil 548 } 549 550 // PutDriverPluginState stores the driver manager's plugin state or returns an 551 // error. 552 func (s *BoltStateDB) PutDriverPluginState(ps *driverstate.PluginState) error { 553 return s.db.Update(func(tx *boltdd.Tx) error { 554 // Retrieve the root driver manager bucket 555 driverBkt, err := tx.CreateBucketIfNotExists(driverManagerBucket) 556 if err != nil { 557 return err 558 } 559 560 return driverBkt.Put(managerPluginStateKey, ps) 561 }) 562 } 563 564 // GetDriverPluginState stores the driver manager's plugin state or returns an 565 // error. 566 func (s *BoltStateDB) GetDriverPluginState() (*driverstate.PluginState, error) { 567 var ps *driverstate.PluginState 568 569 err := s.db.View(func(tx *boltdd.Tx) error { 570 driverBkt := tx.Bucket(driverManagerBucket) 571 if driverBkt == nil { 572 // No state, return 573 return nil 574 } 575 576 // Restore Plugin State if it exists 577 ps = &driverstate.PluginState{} 578 if err := driverBkt.Get(managerPluginStateKey, ps); err != nil { 579 if !boltdd.IsErrNotFound(err) { 580 return fmt.Errorf("failed to read driver manager plugin state: %v", err) 581 } 582 583 // Key not found, reset ps to nil 584 ps = nil 585 } 586 587 return nil 588 }) 589 590 if err != nil { 591 return nil, err 592 } 593 594 return ps, nil 595 } 596 597 // init initializes metadata entries in a newly created state database. 598 func (s *BoltStateDB) init() error { 599 return s.db.Update(func(tx *boltdd.Tx) error { 600 return addMeta(tx.BoltTx()) 601 }) 602 } 603 604 // Upgrade bolt state db from 0.8 schema to 0.9 schema. Noop if already using 605 // 0.9 schema. Creates a backup before upgrading. 606 func (s *BoltStateDB) Upgrade() error { 607 // Check to see if the underlying DB needs upgrading. 608 upgrade, err := NeedsUpgrade(s.db.BoltDB()) 609 if err != nil { 610 return err 611 } 612 if !upgrade { 613 // No upgrade needed! 614 return nil 615 } 616 617 // Upgraded needed. Backup the boltdb first. 618 backupFileName := filepath.Join(s.stateDir, "state.db.backup") 619 if err := backupDB(s.db.BoltDB(), backupFileName); err != nil { 620 return fmt.Errorf("error backing up state db: %v", err) 621 } 622 623 // Perform the upgrade 624 if err := s.db.Update(func(tx *boltdd.Tx) error { 625 if err := UpgradeAllocs(s.logger, tx); err != nil { 626 return err 627 } 628 629 // Add standard metadata 630 if err := addMeta(tx.BoltTx()); err != nil { 631 return err 632 } 633 634 // Write the time the upgrade was done 635 bkt, err := tx.CreateBucketIfNotExists(metaBucketName) 636 if err != nil { 637 return err 638 } 639 return bkt.Put(metaUpgradedKey, time.Now().Format(time.RFC3339)) 640 }); err != nil { 641 return err 642 } 643 644 s.logger.Info("successfully upgraded state") 645 return nil 646 } 647 648 // DB allows access to the underlying BoltDB for testing purposes. 649 func (s *BoltStateDB) DB() *boltdd.DB { 650 return s.db 651 }