github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/libkbfs/rekey_fsm.go (about) 1 // Copyright 2017 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package libkbfs 6 7 import ( 8 "context" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/keybase/client/go/kbfs/data" 14 "github.com/keybase/client/go/kbfs/kbfssync" 15 "github.com/keybase/client/go/kbfs/tlf" 16 "github.com/keybase/client/go/logger" 17 ) 18 19 /* 20 21 This file defines a finite state machine (FSM) for rekey operation scheduling. 22 The state chart is described in following dot graph: 23 24 digraph rekeyFSM { 25 graph [rankdir=LR] 26 start [shape=plaintext] 27 28 Idle -> Idle [label="*"] 29 Scheduled -> Scheduled [label="*"] 30 Started -> Started [label="*"] 31 32 start -> Idle 33 Idle -> Scheduled [label=Request] 34 Scheduled -> Scheduled [label="Request,RekeyNotNeeded"] 35 Scheduled -> Started [label=Timeup] 36 Started -> Scheduled [label="Finished(TTL valid && (rekey done || needs paper))"] 37 Started -> Idle [label="Finished (*)"] 38 } 39 40 */ 41 42 // CtxRekeyTagKey is the type used for unique context tags within an 43 // enqueued Rekey. 44 type CtxRekeyTagKey int 45 46 const ( 47 // CtxRekeyIDKey is the type of the tag for unique operation IDs 48 // within an enqueued Rekey. 49 CtxRekeyIDKey CtxRekeyTagKey = iota 50 ) 51 52 // CtxRekeyOpID is the display name for the unique operation 53 // enqueued rekey ID tag. 54 const CtxRekeyOpID = "REKEYID" 55 56 type rekeyEventType int 57 58 const ( 59 _ rekeyEventType = iota 60 rekeyRequestEvent 61 rekeyFinishedEvent 62 rekeyTimeupEvent 63 rekeyNotNeededEvent 64 rekeyKickoffEvent 65 66 rekeyShutdownEvent 67 68 rekeyCancelEventForTest 69 ) 70 71 func (e rekeyEventType) String() string { 72 switch e { 73 case rekeyRequestEvent: 74 return "rekeyRequestEvent" 75 case rekeyFinishedEvent: 76 return "rekeyFinishedEvent" 77 case rekeyTimeupEvent: 78 return "rekeyTimeupEvent" 79 case rekeyNotNeededEvent: 80 return "rekeyNotNeededEvent" 81 case rekeyShutdownEvent: 82 return "rekeyShutdownEvent" 83 case rekeyKickoffEvent: 84 return "rekeyKickoffEvent" 85 case rekeyCancelEventForTest: 86 return "rekeyCancelEventForTest" 87 default: 88 return "unknown" 89 } 90 } 91 92 // rekeyTask describes a rekey task. 93 type rekeyTask struct { 94 // timeout, if non-nil, causes rekey to fail if it takes more than this 95 // duration since it enters rekeyStateStarted. 96 timeout *time.Duration 97 ttl int 98 promptPaper bool 99 100 ctx *protectedContext 101 } 102 103 // rekeyRequest describes a rekey request. 104 type rekeyRequest struct { 105 // delay is the duration to wait for since the request enters the FSM until 106 // starting the rekey. 107 delay time.Duration 108 rekeyTask 109 } 110 111 // rekeyFinished describes a rekeyFinishedEvent. It contains results from an 112 // actual rekey operation. 113 type rekeyFinished struct { 114 RekeyResult 115 err error 116 } 117 118 // RekeyEvent describes an event to send into the RekeyFSM. A function, e.g., 119 // NewRekeyRequestEvent, should be used to construct one. 120 type RekeyEvent struct { 121 eventType rekeyEventType 122 request *rekeyRequest 123 finished *rekeyFinished 124 } 125 126 func (e RekeyEvent) String() string { 127 switch e.eventType { 128 case rekeyRequestEvent: 129 return fmt.Sprintf("%s [%#+v]", e.eventType, e.request) 130 case rekeyFinishedEvent: 131 return fmt.Sprintf("%s [%#+v]", e.eventType, e.finished) 132 default: 133 return e.eventType.String() 134 } 135 } 136 137 func newRekeyRequestEvent(req rekeyRequest) RekeyEvent { 138 return RekeyEvent{ 139 eventType: rekeyRequestEvent, 140 request: &req, 141 } 142 } 143 144 func newRekeyRequestEventWithContext(ctx context.Context) RekeyEvent { 145 return newRekeyRequestEvent(rekeyRequest{ 146 delay: 0, 147 rekeyTask: rekeyTask{ 148 timeout: nil, 149 promptPaper: false, 150 ttl: rekeyInitialTTL, 151 ctx: newProtectedContext(ctx, nil), 152 }, 153 }) 154 } 155 156 // NewRekeyRequestWithPaperPromptEvent creates a non-delayed rekey request 157 // Event that causes a paper prompt. 158 func NewRekeyRequestWithPaperPromptEvent() RekeyEvent { 159 e := NewRekeyRequestEvent() 160 d := rekeyWithPromptWaitTimeDefault 161 e.request.promptPaper = true 162 e.request.timeout = &d 163 return e 164 } 165 166 // NewRekeyRequestEvent creates a non-delayed rekey request Event. 167 func NewRekeyRequestEvent() RekeyEvent { 168 return newRekeyRequestEventWithContext(CtxWithRandomIDReplayable( 169 context.Background(), CtxRekeyIDKey, CtxRekeyOpID, nil)) 170 } 171 172 // NewRekeyNotNeededEvent creates a rekeyNotNeededEvent typed event. If the FSM 173 // is in rekeyStateScheduled, this causes FSM to unset paperkey prompt. In 174 // other states nothing happens. This event is sent to the FSM when we see a MD 175 // update with rekey flag unset. It can be an indication that an old 176 // outstanding rekey request has been served by another device, or just a 177 // regular rekey updates. 178 func NewRekeyNotNeededEvent() RekeyEvent { 179 return RekeyEvent{ 180 eventType: rekeyNotNeededEvent, 181 } 182 } 183 184 func newRekeyFinishedEvent(res RekeyResult, err error) RekeyEvent { 185 return RekeyEvent{ 186 eventType: rekeyFinishedEvent, 187 finished: &rekeyFinished{ 188 RekeyResult: res, 189 err: err, 190 }, 191 } 192 } 193 194 func newRekeyTimeupEvent() RekeyEvent { 195 return RekeyEvent{ 196 eventType: rekeyTimeupEvent, 197 } 198 } 199 200 func newRekeyShutdownEvent() RekeyEvent { 201 return RekeyEvent{ 202 eventType: rekeyShutdownEvent, 203 } 204 } 205 206 func newRekeyKickoffEvent() RekeyEvent { 207 return RekeyEvent{ 208 eventType: rekeyKickoffEvent, 209 } 210 } 211 212 func newRekeyCancelEventForTest() RekeyEvent { 213 return RekeyEvent{ 214 eventType: rekeyCancelEventForTest, 215 } 216 } 217 218 // rekeyState models a state in the FSM. rekeyFSM keeps exactly one instance of 219 // rekeyState at any given time. 220 type rekeyState interface { 221 // reactToEvent defines how this state reacts to an event. Implementations of 222 // rekeyState should handle necessary transition actions in reactToEvent(), 223 // and return a new rekeyState instance after transition is finished. 224 // rekeyFSM sends event to the rekeyState instance it holds whenever it 225 // receives an event, and use the returned rekeyState instance as new state. 226 // It's OK to return the receiver itself as "new" state. 227 // 228 // rekeyFSM runs an event loop in a dedicated goroutine that calls 229 // reactToEvent and updates states. In other words, it's safe to assume 230 // reactToEvent is only called within the same goroutine, and that it's 231 // impossible that multiple reactToEvent calls are issued concurrently. 232 reactToEvent(event RekeyEvent) rekeyState 233 } 234 235 type rekeyStateIdle struct { 236 fsm *rekeyFSM 237 } 238 239 func newRekeyStateIdle(fsm *rekeyFSM) *rekeyStateIdle { 240 return &rekeyStateIdle{fsm: fsm} 241 } 242 243 func (r *rekeyStateIdle) reactToEvent(event RekeyEvent) rekeyState { 244 switch event.eventType { 245 case rekeyRequestEvent: 246 return newRekeyStateScheduled(r.fsm, 247 event.request.delay, event.request.rekeyTask) 248 default: 249 return r 250 } 251 } 252 253 type rekeyStateScheduled struct { 254 fsm *rekeyFSM 255 256 timer *time.Timer 257 deadline time.Time 258 259 task rekeyTask 260 } 261 262 func newRekeyStateScheduled( 263 fsm *rekeyFSM, delay time.Duration, task rekeyTask) *rekeyStateScheduled { 264 task.ctx.setLogger(fsm.log) 265 return &rekeyStateScheduled{ 266 fsm: fsm, 267 timer: time.AfterFunc(delay, func() { 268 fsm.Event(newRekeyTimeupEvent()) 269 }), 270 deadline: time.Now().Add(delay), 271 task: task, 272 } 273 } 274 275 func (r *rekeyStateScheduled) reactToEvent(event RekeyEvent) rekeyState { 276 switch event.eventType { 277 case rekeyTimeupEvent: 278 // This blocks (which inheritently blocks the entire FSM) if too many 279 // are active. 280 if r.fsm.fbo.config.GetRekeyFSMLimiter().WaitToStart( 281 r.task.ctx.context()) != nil { 282 return r 283 } 284 return newRekeyStateStarted(r.fsm, r.task) 285 case rekeyRequestEvent: 286 if r.task.promptPaper && !event.request.promptPaper { 287 // KBFS-2251: If fbo concludes that paper key would be needed in 288 // order for rekey to proceed, it writes a MD to mdserver with 289 // rekey set at the same time. To prevent the FSM from being kicked 290 // of to rekeyStateStarted right away after receiving this update 291 // (through FoldersNeedRekey) from mdserver, we just reuse the same 292 // timer if r.task.promptPaper is set. 293 // 294 // If the request has promptPaper set, then it's from the KBFS 295 // client, likely due to a read request. In this case, we should 296 // shorten the wait timer according the the request. 297 r.fsm.log.CDebugf(r.task.ctx.context(), "Reusing existing timer "+ 298 "without possibly shortening due to r.task.promptPaper==true") 299 return r 300 } 301 302 task := r.task 303 task.promptPaper = task.promptPaper || event.request.promptPaper 304 if task.timeout == nil { 305 task.timeout = event.request.timeout 306 } 307 task.ttl = event.request.ttl 308 task.ctx.maybeReplaceContext(event.request.ctx.context()) 309 if !r.deadline.After(time.Now().Add(event.request.delay)) { 310 r.fsm.log.CDebugf(task.ctx.context(), "Reusing existing timer") 311 r.task = task 312 return r 313 } 314 r.timer.Stop() 315 return newRekeyStateScheduled(r.fsm, event.request.delay, task) 316 case rekeyNotNeededEvent: 317 // KBFS-2254: if another device finished rekey, we should unset the 318 // paperkey prompt so that if this other device goes offline before a 319 // third device triggers a rekey request, the timer can be preempted. 320 // What if the FoldersNeedRekey call comes in before this and we still 321 // miss the rekey request? Well now we also send a rekey request into 322 // the FSM on MD updates with rekey flag set. Since the MD updates are 323 // applied in order, and that FSM's state transition is 324 // single-goroutined, we are safe here. 325 r.task.promptPaper = false 326 return r 327 case rekeyKickoffEvent: 328 r.timer.Reset(time.Millisecond) 329 return r 330 case rekeyCancelEventForTest: 331 r.timer.Stop() 332 return newRekeyStateIdle(r.fsm) 333 case rekeyShutdownEvent: 334 r.timer.Stop() 335 return r 336 default: 337 return r 338 } 339 } 340 341 type rekeyStateStarted struct { 342 fsm *rekeyFSM 343 task rekeyTask 344 } 345 346 func newRekeyStateStarted(fsm *rekeyFSM, task rekeyTask) *rekeyStateStarted { 347 ctx := task.ctx.context() 348 var cancel context.CancelFunc 349 if task.timeout != nil { 350 ctx, cancel = context.WithTimeout(task.ctx.context(), *task.timeout) 351 } 352 go func() { 353 defer fsm.fbo.config.GetRekeyFSMLimiter().Done() 354 if cancel != nil { 355 defer cancel() 356 } 357 fsm.log.CDebugf(ctx, "Processing rekey for %s", fsm.fbo.folderBranch.Tlf) 358 var res RekeyResult 359 err := fsm.fbo.doMDWriteWithRetryUnlessCanceled(ctx, 360 func(lState *kbfssync.LockState) (err error) { 361 res, err = fsm.fbo.rekeyLocked(ctx, lState, task.promptPaper) 362 return err 363 }) 364 fsm.log.CDebugf(ctx, "Rekey finished with res=%#+v, error=%v", res, err) 365 fsm.Event(newRekeyFinishedEvent(res, err)) 366 }() 367 return &rekeyStateStarted{ 368 fsm: fsm, 369 task: task, 370 } 371 } 372 373 func (r *rekeyStateStarted) reactToEvent(event RekeyEvent) rekeyState { 374 switch event.eventType { 375 case rekeyFinishedEvent: 376 ttl := r.task.ttl - 1 377 r.fsm.log.CDebugf(r.task.ctx.context(), 378 "Rekey finished, ttl: %d -> %d", r.task.ttl, ttl) 379 380 if ttl <= 0 { 381 r.fsm.log.CDebugf(r.task.ctx.context(), 382 "Not scheduling new rekey because TTL expired") 383 return newRekeyStateIdle(r.fsm) 384 } 385 386 switch event.finished.err { 387 case nil: 388 default: 389 r.fsm.log.CDebugf(r.task.ctx.context(), 390 "Rekey errored; scheduling new rekey in %s", rekeyRecheckInterval) 391 return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{ 392 timeout: r.task.timeout, 393 promptPaper: r.task.promptPaper, 394 ttl: ttl, 395 ctx: r.task.ctx, 396 }) 397 } 398 399 d := r.fsm.fbo.config.RekeyWithPromptWaitTime() 400 if event.finished.NeedsPaperKey { 401 r.fsm.log.CDebugf(r.task.ctx.context(), 402 "Scheduling rekey due to NeedsPaperKey==true") 403 return newRekeyStateScheduled(r.fsm, d, rekeyTask{ 404 timeout: &d, 405 promptPaper: true, 406 ttl: ttl, 407 ctx: r.task.ctx, 408 }) 409 } 410 411 if event.finished.DidRekey { 412 // We enqueue the rekey here again, in case we missed a device due to a 413 // race condition. This is specifically for the situation where user 414 // provisions two devices in a row, and the key update for the 2nd device 415 // only comes in after rekey for a TLF is done, which didn't include the 416 // second device. At this point, there wouldn't be a new MD with rekey 417 // bit set since it's already set. As a result, the TLF won't get rekeyed 418 // for the second device until the next 1-hour timer triggers another 419 // scan. 420 r.fsm.log.CDebugf(r.task.ctx.context(), 421 "Scheduling rekey (recheck) due to DidRekey==true") 422 return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{ 423 timeout: nil, 424 promptPaper: false, 425 ttl: ttl, 426 ctx: r.task.ctx, 427 }) 428 } 429 430 r.fsm.log.CDebugf(r.task.ctx.context(), 431 "Not scheduling rekey because no more rekeys or rechecks are needed") 432 return newRekeyStateIdle(r.fsm) 433 default: 434 return r 435 } 436 } 437 438 type rekeyFSMListener struct { 439 repeatedly bool 440 onEvent func(RekeyEvent) 441 } 442 443 type rekeyFSM struct { 444 shutdownCh chan struct{} 445 reqs chan RekeyEvent 446 447 fbo *folderBranchOps 448 log logger.Logger 449 450 current rekeyState 451 452 muListeners sync.Mutex 453 listeners map[rekeyEventType][]rekeyFSMListener 454 } 455 456 // NewRekeyFSM creates a new rekey FSM. 457 func NewRekeyFSM(fbo *folderBranchOps) RekeyFSM { 458 fsm := &rekeyFSM{ 459 reqs: make(chan RekeyEvent, fbo.config.Mode().RekeyQueueSize()), 460 shutdownCh: make(chan struct{}), 461 fbo: fbo, 462 log: fbo.config.MakeLogger("RekeyFSM"), 463 464 listeners: make(map[rekeyEventType][]rekeyFSMListener), 465 } 466 fsm.current = newRekeyStateIdle(fsm) 467 if fbo.bType == standard { 468 go fsm.loop() 469 } 470 return fsm 471 } 472 473 func (m *rekeyFSM) loop() { 474 reqs := m.reqs 475 for { 476 select { 477 case e := <-reqs: 478 next := m.current.reactToEvent(e) 479 if e.eventType == rekeyShutdownEvent { 480 // Set reqs to nil so on next iteration, we will skip any 481 // content in reqs. So if there are multiple 482 // rekeyShutdownEvent, we won't close m.shutdownCh multiple 483 // times. 484 reqs = nil 485 close(m.shutdownCh) 486 } else { 487 // Only log if we're not shutting down, otherwise `go vet` 488 // yells at us in tests. 489 m.log.Debug("RekeyFSM transition: %T + %s -> %T", 490 m.current, e, next) 491 } 492 m.current = next 493 m.triggerCallbacksForTest(e) 494 495 case <-m.shutdownCh: 496 return 497 } 498 } 499 } 500 501 // Event implements RekeyFSM interface for rekeyFSM. 502 func (m *rekeyFSM) Event(event RekeyEvent) { 503 select { 504 case m.reqs <- event: 505 case <-m.shutdownCh: 506 } 507 } 508 509 // Shutdown implements RekeyFSM interface for rekeyFSM. 510 func (m *rekeyFSM) Shutdown() { 511 m.Event(newRekeyShutdownEvent()) 512 } 513 514 func (m *rekeyFSM) triggerCallbacksForTest(e RekeyEvent) { 515 var cbs []rekeyFSMListener 516 func() { 517 m.muListeners.Lock() 518 defer m.muListeners.Unlock() 519 cbs = m.listeners[e.eventType] 520 m.listeners[e.eventType] = nil 521 for _, cb := range cbs { 522 if cb.repeatedly { 523 m.listeners[e.eventType] = append( 524 m.listeners[e.eventType], cb) 525 } 526 } 527 }() 528 for _, cb := range cbs { 529 cb.onEvent(e) 530 } 531 } 532 533 // listenOnEvent implements RekeyFSM interface for rekeyFSM. 534 func (m *rekeyFSM) listenOnEvent( 535 event rekeyEventType, callback func(RekeyEvent), repeatedly bool) { 536 m.muListeners.Lock() 537 defer m.muListeners.Unlock() 538 m.listeners[event] = append(m.listeners[event], rekeyFSMListener{ 539 onEvent: callback, 540 repeatedly: repeatedly, 541 }) 542 } 543 544 func getRekeyFSM(ctx context.Context, ops KBFSOps, tlfID tlf.ID) RekeyFSM { 545 switch o := ops.(type) { 546 case *KBFSOpsStandard: 547 return o.getOpsNoAdd( 548 ctx, data.FolderBranch{ 549 Tlf: tlfID, 550 Branch: data.MasterBranch, 551 }).rekeyFSM 552 default: 553 panic("unknown KBFSOps") 554 } 555 } 556 557 // RequestRekeyAndWaitForOneFinishEvent sends a rekey request to the FSM 558 // associated with tlfID, and wait for exact one rekeyFinished event. This can 559 // be useful for waiting for a rekey result in tests. 560 // 561 // Note that the supplied ctx is injected to the rekey task, so canceling ctx 562 // would actually cancel the rekey. 563 // 564 // Currently this is only used in tests and RekeyFile. Normal rekey activities 565 // should go through the FSM asychronously. 566 func RequestRekeyAndWaitForOneFinishEvent(ctx context.Context, 567 ops KBFSOps, tlfID tlf.ID) (res RekeyResult, err error) { 568 fsm := getRekeyFSM(ctx, ops, tlfID) 569 rekeyWaiter := make(chan struct{}) 570 fsm.listenOnEvent(rekeyFinishedEvent, func(e RekeyEvent) { 571 res = e.finished.RekeyResult 572 err = e.finished.err 573 close(rekeyWaiter) 574 }, false) 575 fsm.Event(newRekeyRequestEventWithContext(ctx)) 576 <-rekeyWaiter 577 return res, err 578 }