github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/light/client.go (about) 1 package light 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "sort" 9 "sync" 10 "time" 11 12 "github.com/ari-anchor/sei-tendermint/libs/log" 13 tmmath "github.com/ari-anchor/sei-tendermint/libs/math" 14 "github.com/ari-anchor/sei-tendermint/light/provider" 15 "github.com/ari-anchor/sei-tendermint/light/store" 16 17 "github.com/ari-anchor/sei-tendermint/types" 18 ) 19 20 type mode byte 21 22 const ( 23 sequential mode = iota + 1 24 skipping 25 26 defaultPruningSize = 1000 27 28 // For verifySkipping, we need an algorithm to find what height to check 29 // next to see if it has sufficient validator set overlap. The most 30 // intuitive method is to take the halfway point i.e. if you trusted block 31 // 1 and were not able to verify block 128 then your next try would be 64. 32 // 33 // However, because this implementation caches all the prior results, instead of always taking halfpoints 34 // it is more efficient to re-check cached blocks. Take this simple example. Say 35 // you failed to verify 64 but were able to verify block 32. Following a strict half-way policy, 36 // you would start over again and try verify to block 128. If this failed 37 // then the halfway point between 32 and 128 is 80. But you already have 38 // block 64. Instead of requesting and waiting for another block it is far 39 // better to try again with block 64. This is of course not directly in the 40 // middle. In fact, no matter how the algrorithm plays out, the blocks in 41 // cache are always going to be a little less than the halfway point ( 42 // maximum 1/8 less). To account for this we add a heuristic, bumping the 43 // next height to 9/16 instead of 1/2 44 verifySkippingNumerator = 9 45 verifySkippingDenominator = 16 46 47 // 10s should cover most of the clients. 48 // References: 49 // - http://vancouver-webpages.com/time/web.html 50 // - https://blog.codinghorror.com/keeping-time-on-the-pc/ 51 defaultMaxClockDrift = 10 * time.Second 52 53 // 10s is sufficient for most networks. 54 defaultMaxBlockLag = 10 * time.Second 55 ) 56 57 // Option sets a parameter for the light client. 58 type Option func(*Client) 59 60 // SequentialVerification option configures the light client to sequentially 61 // check the blocks (every block, in ascending height order). Note this is 62 // much slower than SkippingVerification, albeit more secure. 63 func SequentialVerification() Option { 64 return func(c *Client) { c.verificationMode = sequential } 65 } 66 67 // SkippingVerification option configures the light client to skip blocks as 68 // long as {trustLevel} of the old validator set signed the new header. The 69 // verifySkipping algorithm from the specification is used for finding the minimal 70 // "trust path". 71 // 72 // trustLevel - fraction of the old validator set (in terms of voting power), 73 // which must sign the new header in order for us to trust it. NOTE this only 74 // applies to non-adjacent headers. For adjacent headers, sequential 75 // verification is used. 76 func SkippingVerification(trustLevel tmmath.Fraction) Option { 77 return func(c *Client) { 78 c.verificationMode = skipping 79 c.trustLevel = trustLevel 80 } 81 } 82 83 // PruningSize option sets the maximum amount of light blocks that the light 84 // client stores. When Prune() is run, all light blocks that are earlier than 85 // the h amount of light blocks will be removed from the store. 86 // Default: 1000. A pruning size of 0 will not prune the light client at all. 87 func PruningSize(h uint16) Option { 88 return func(c *Client) { c.pruningSize = h } 89 } 90 91 // Logger option can be used to set a logger for the client. 92 func Logger(l log.Logger) Option { 93 return func(c *Client) { c.logger = l } 94 } 95 96 // MaxClockDrift defines how much new header's time can drift into 97 // the future relative to the light clients local time. Default: 10s. 98 func MaxClockDrift(d time.Duration) Option { 99 return func(c *Client) { c.maxClockDrift = d } 100 } 101 102 // MaxBlockLag represents the maximum time difference between the realtime 103 // that a block is received and the timestamp of that block. 104 // One can approximate it to the maximum block production time 105 // 106 // As an example, say the light client received block B at a time 107 // 12:05 (this is the real time) and the time on the block 108 // was 12:00. Then the lag here is 5 minutes. 109 // Default: 10s 110 func MaxBlockLag(d time.Duration) Option { 111 return func(c *Client) { c.maxBlockLag = d } 112 } 113 114 // Client represents a light client, connected to a single chain, which gets 115 // light blocks from a primary provider, verifies them either sequentially or by 116 // skipping some and stores them in a trusted store (usually, a local FS). 117 // 118 // Default verification: SkippingVerification(DefaultTrustLevel) 119 type Client struct { 120 chainID string 121 trustingPeriod time.Duration // see TrustOptions.Period 122 verificationMode mode 123 trustLevel tmmath.Fraction 124 maxClockDrift time.Duration 125 maxBlockLag time.Duration 126 127 // Mutex for locking during changes of the light clients providers 128 providerMutex sync.Mutex 129 // Primary provider of new headers. 130 primary provider.Provider 131 // Providers used to "witness" new headers. 132 witnesses []provider.Provider 133 134 // Where trusted light blocks are stored. 135 trustedStore store.Store 136 // Highest trusted light block from the store (height=H). 137 latestTrustedBlock *types.LightBlock 138 139 // See PruningSize option 140 pruningSize uint16 141 142 logger log.Logger 143 } 144 145 // NewClient returns a new light client. It returns an error if it fails to 146 // obtain the light block from the primary, or they are invalid (e.g. trust 147 // hash does not match with the one from the headers). 148 // 149 // Witnesses are providers, which will be used for cross-checking the primary 150 // provider. At least one witness should be given when skipping verification is 151 // used (default). A verified header is compared with the headers at same height 152 // obtained from the specified witnesses. A witness can become a primary iff the 153 // current primary is unavailable. 154 // 155 // See all Option(s) for the additional configuration. 156 func NewClient( 157 ctx context.Context, 158 chainID string, 159 trustOptions TrustOptions, 160 primary provider.Provider, 161 witnesses []provider.Provider, 162 trustedStore store.Store, 163 options ...Option, 164 ) (*Client, error) { 165 166 // Check whether the trusted store already has a trusted block. If so, then create 167 // a new client from the trusted store instead of the trust options. 168 lastHeight, err := trustedStore.LastLightBlockHeight() 169 if err != nil { 170 return nil, err 171 } 172 if lastHeight > 0 { 173 return NewClientFromTrustedStore( 174 chainID, trustOptions.Period, primary, witnesses, trustedStore, options..., 175 ) 176 } 177 178 // Validate the number of witnesses. 179 if len(witnesses) < 1 { 180 return nil, ErrNoWitnesses 181 } 182 183 // Validate trust options 184 if err := trustOptions.ValidateBasic(); err != nil { 185 return nil, fmt.Errorf("invalid TrustOptions: %w", err) 186 } 187 188 c := &Client{ 189 chainID: chainID, 190 trustingPeriod: trustOptions.Period, 191 verificationMode: skipping, 192 primary: primary, 193 witnesses: witnesses, 194 trustedStore: trustedStore, 195 trustLevel: DefaultTrustLevel, 196 maxClockDrift: defaultMaxClockDrift, 197 maxBlockLag: defaultMaxBlockLag, 198 pruningSize: defaultPruningSize, 199 logger: log.NewNopLogger(), 200 } 201 202 for _, o := range options { 203 o(c) 204 } 205 206 // Validate trust level. 207 if err := ValidateTrustLevel(c.trustLevel); err != nil { 208 return nil, err 209 } 210 211 // Use the trusted hash and height to fetch the first weakly-trusted block 212 // from the primary provider. Assert that all the witnesses have the same block 213 if err := c.initializeWithTrustOptions(ctx, trustOptions); err != nil { 214 return nil, err 215 } 216 217 return c, nil 218 } 219 220 // NewClientFromTrustedStore initializes an existing client from the trusted store. 221 // It does not check that the providers have the same trusted block. 222 func NewClientFromTrustedStore( 223 chainID string, 224 trustingPeriod time.Duration, 225 primary provider.Provider, 226 witnesses []provider.Provider, 227 trustedStore store.Store, 228 options ...Option) (*Client, error) { 229 230 c := &Client{ 231 chainID: chainID, 232 trustingPeriod: trustingPeriod, 233 verificationMode: skipping, 234 trustLevel: DefaultTrustLevel, 235 maxClockDrift: defaultMaxClockDrift, 236 maxBlockLag: defaultMaxBlockLag, 237 primary: primary, 238 witnesses: witnesses, 239 trustedStore: trustedStore, 240 pruningSize: defaultPruningSize, 241 logger: log.NewNopLogger(), 242 } 243 244 for _, o := range options { 245 o(c) 246 } 247 248 // Validate trust level. 249 if err := ValidateTrustLevel(c.trustLevel); err != nil { 250 return nil, err 251 } 252 253 // Check that the trusted store has at least one block and 254 if err := c.restoreTrustedLightBlock(); err != nil { 255 return nil, err 256 } 257 258 return c, nil 259 } 260 261 // restoreTrustedLightBlock loads the latest trusted light block from the store 262 func (c *Client) restoreTrustedLightBlock() error { 263 lastHeight, err := c.trustedStore.LastLightBlockHeight() 264 if err != nil { 265 return fmt.Errorf("can't get last trusted light block height: %w", err) 266 } 267 if lastHeight <= 0 { 268 return errors.New("trusted store is empty") 269 } 270 271 trustedBlock, err := c.trustedStore.LightBlock(lastHeight) 272 if err != nil { 273 return fmt.Errorf("can't get last trusted light block: %w", err) 274 } 275 c.latestTrustedBlock = trustedBlock 276 c.logger.Info("restored trusted light block", "height", lastHeight) 277 278 return nil 279 } 280 281 // initializeWithTrustOptions fetches the weakly-trusted light block from 282 // primary provider, matches it to the trusted hash, and sets it as the 283 // lastTrustedBlock. It then asserts that all witnesses have the same light block. 284 func (c *Client) initializeWithTrustOptions(ctx context.Context, options TrustOptions) error { 285 // 1) Fetch and verify the light block. Note that we do not verify the time of the first block 286 l, err := c.lightBlockFromPrimary(ctx, options.Height) 287 if err != nil { 288 return err 289 } 290 291 // 2) Assert that the hashes match 292 if !bytes.Equal(l.Header.Hash(), options.Hash) { 293 return fmt.Errorf("expected header's hash %X, but got %X", options.Hash, l.Hash()) 294 } 295 296 // 3) Ensure that +2/3 of validators signed correctly. This also sanity checks that the 297 // chain ID is the same. 298 err = l.ValidatorSet.VerifyCommitLight(c.chainID, l.Commit.BlockID, l.Height, l.Commit) 299 if err != nil { 300 return fmt.Errorf("invalid commit: %w", err) 301 } 302 303 // 4) Cross-verify with witnesses to ensure everybody has the same state. 304 if err := c.compareFirstHeaderWithWitnesses(ctx, l.SignedHeader); err != nil { 305 return err 306 } 307 308 // 5) Persist both of them and continue. 309 return c.updateTrustedLightBlock(l) 310 } 311 312 // TrustedLightBlock returns a trusted light block at the given height (0 - the latest). 313 // 314 // It returns an error if: 315 // - there are some issues with the trusted store, although that should not 316 // happen normally; 317 // - negative height is passed; 318 // - header has not been verified yet and is therefore not in the store 319 // 320 // Safe for concurrent use by multiple goroutines. 321 func (c *Client) TrustedLightBlock(height int64) (*types.LightBlock, error) { 322 height, err := c.compareWithLatestHeight(height) 323 if err != nil { 324 return nil, err 325 } 326 return c.trustedStore.LightBlock(height) 327 } 328 329 func (c *Client) compareWithLatestHeight(height int64) (int64, error) { 330 latestHeight, err := c.LastTrustedHeight() 331 if err != nil { 332 return 0, fmt.Errorf("can't get last trusted height: %w", err) 333 } 334 if latestHeight == -1 { 335 return 0, errors.New("no headers exist") 336 } 337 338 switch { 339 case height > latestHeight: 340 return 0, fmt.Errorf("unverified header/valset requested (latest: %d)", latestHeight) 341 case height == 0: 342 return latestHeight, nil 343 case height < 0: 344 return 0, errors.New("negative height") 345 } 346 347 return height, nil 348 } 349 350 // Update attempts to advance the state by downloading the latest light 351 // block and verifying it. It returns a new light block on a successful 352 // update. Otherwise, it returns nil (plus an error, if any). 353 func (c *Client) Update(ctx context.Context, now time.Time) (*types.LightBlock, error) { 354 lastTrustedHeight, err := c.LastTrustedHeight() 355 if err != nil { 356 return nil, fmt.Errorf("can't get last trusted height: %w", err) 357 } 358 359 if lastTrustedHeight == -1 { 360 // no light blocks yet => wait 361 return nil, nil 362 } 363 364 latestBlock, err := c.lightBlockFromPrimary(ctx, 0) 365 if err != nil { 366 return nil, err 367 } 368 369 // If there is a new light block then verify it 370 if latestBlock.Height > lastTrustedHeight { 371 err = c.verifyLightBlock(ctx, latestBlock, now) 372 if err != nil { 373 return nil, err 374 } 375 c.logger.Info("advanced to new state", "height", latestBlock.Height, "hash", latestBlock.Hash()) 376 return latestBlock, nil 377 } 378 379 // else return the latestTrustedBlock 380 return c.latestTrustedBlock, nil 381 } 382 383 // VerifyLightBlockAtHeight fetches the light block at the given height 384 // and verifies it. It returns the block immediately if it exists in 385 // the trustedStore (no verification is needed). 386 // 387 // height must be > 0. 388 // 389 // It returns provider.ErrlightBlockNotFound if light block is not found by 390 // primary. 391 // 392 // It will replace the primary provider if an error from a request to the provider occurs 393 func (c *Client) VerifyLightBlockAtHeight(ctx context.Context, height int64, now time.Time) (*types.LightBlock, error) { 394 if height <= 0 { 395 return nil, errors.New("negative or zero height") 396 } 397 398 // Check if the light block is already verified. 399 h, err := c.TrustedLightBlock(height) 400 if err == nil { 401 c.logger.Debug("header has already been verified", "height", height, "hash", h.Hash()) 402 // Return already trusted light block 403 return h, nil 404 } 405 406 // Request the light block from primary 407 l, err := c.lightBlockFromPrimary(ctx, height) 408 if err != nil { 409 return nil, err 410 } 411 412 return l, c.verifyLightBlock(ctx, l, now) 413 } 414 415 // VerifyHeader verifies a new header against the trusted state. It returns 416 // immediately if newHeader exists in trustedStore (no verification is 417 // needed). Else it performs one of the two types of verification: 418 // 419 // SequentialVerification: verifies that 2/3 of the trusted validator set has 420 // signed the new header. If the headers are not adjacent, **all** intermediate 421 // headers will be requested. Intermediate headers are not saved to database. 422 // 423 // SkippingVerification(trustLevel): verifies that {trustLevel} of the trusted 424 // validator set has signed the new header. If it's not the case and the 425 // headers are not adjacent, verifySkipping is performed and necessary (not all) 426 // intermediate headers will be requested. See the specification for details. 427 // Intermediate headers are not saved to database. 428 // https://github.com/ari-anchor/sei-tendermint/blob/master/spec/light-client/README.md 429 // 430 // If the header, which is older than the currently trusted header, is 431 // requested and the light client does not have it, VerifyHeader will perform: 432 // 433 // a) verifySkipping verification if nearest trusted header is found & not expired 434 // b) backwards verification in all other cases 435 // 436 // It returns ErrOldHeaderExpired if the latest trusted header expired. 437 // 438 // If the primary provides an invalid header (ErrInvalidHeader), it is rejected 439 // and replaced by another provider until all are exhausted. 440 // 441 // If, at any moment, a LightBlock is not found by the primary provider as part of 442 // verification then the provider will be replaced by another and the process will 443 // restart. 444 func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now time.Time) error { 445 if newHeader == nil { 446 return errors.New("nil header") 447 } 448 if newHeader.Height <= 0 { 449 return errors.New("negative or zero height") 450 } 451 452 // Check if newHeader already verified. 453 l, err := c.TrustedLightBlock(newHeader.Height) 454 if err == nil { 455 // Make sure it's the same header. 456 if !bytes.Equal(l.Hash(), newHeader.Hash()) { 457 return fmt.Errorf("existing trusted header %X does not match newHeader %X", l.Hash(), newHeader.Hash()) 458 } 459 c.logger.Debug("header has already been verified", 460 "height", newHeader.Height, "hash", newHeader.Hash()) 461 return nil 462 } 463 464 // Request the header and the vals. 465 l, err = c.lightBlockFromPrimary(ctx, newHeader.Height) 466 if err != nil { 467 return fmt.Errorf("failed to retrieve light block from primary to verify against: %w", err) 468 } 469 470 if !bytes.Equal(l.Hash(), newHeader.Hash()) { 471 return fmt.Errorf("header from primary %X does not match newHeader %X", l.Hash(), newHeader.Hash()) 472 } 473 474 return c.verifyLightBlock(ctx, l, now) 475 } 476 477 func (c *Client) verifyLightBlock(ctx context.Context, newLightBlock *types.LightBlock, now time.Time) error { 478 c.logger.Info("verify light block", "height", newLightBlock.Height, "hash", newLightBlock.Hash()) 479 480 var ( 481 verifyFunc func(ctx context.Context, trusted *types.LightBlock, new *types.LightBlock, now time.Time) error 482 err error 483 ) 484 485 switch c.verificationMode { 486 case sequential: 487 verifyFunc = c.verifySequential 488 case skipping: 489 verifyFunc = c.verifySkippingAgainstPrimary 490 default: 491 panic(fmt.Sprintf("Unknown verification mode: %b", c.verificationMode)) 492 } 493 494 firstBlockHeight, err := c.FirstTrustedHeight() 495 if err != nil { 496 return fmt.Errorf("can't get first light block height: %w", err) 497 } 498 499 switch { 500 // Verifying forwards 501 case newLightBlock.Height >= c.latestTrustedBlock.Height: 502 err = verifyFunc(ctx, c.latestTrustedBlock, newLightBlock, now) 503 504 // Verifying backwards 505 case newLightBlock.Height < firstBlockHeight: 506 var firstBlock *types.LightBlock 507 firstBlock, err = c.trustedStore.LightBlock(firstBlockHeight) 508 if err != nil { 509 return fmt.Errorf("can't get first light block: %w", err) 510 } 511 err = c.backwards(ctx, firstBlock.Header, newLightBlock.Header) 512 513 // Verifying between first and last trusted light block. In this situation 514 // we find the closest block prior to the target height then perform 515 // verification forwards. 516 default: 517 var closestBlock *types.LightBlock 518 closestBlock, err = c.trustedStore.LightBlockBefore(newLightBlock.Height) 519 if err != nil { 520 return fmt.Errorf("can't get signed header before height %d: %w", newLightBlock.Height, err) 521 } 522 err = verifyFunc(ctx, closestBlock, newLightBlock, now) 523 } 524 if err != nil { 525 c.logger.Error("failed to verify", "err", err) 526 return err 527 } 528 529 // Once verified, save and return 530 return c.updateTrustedLightBlock(newLightBlock) 531 } 532 533 // see VerifyHeader 534 func (c *Client) verifySequential( 535 ctx context.Context, 536 trustedBlock *types.LightBlock, 537 newLightBlock *types.LightBlock, 538 now time.Time) error { 539 540 var ( 541 verifiedBlock = trustedBlock 542 interimBlock *types.LightBlock 543 err error 544 trace = []*types.LightBlock{trustedBlock} 545 ) 546 547 for height := trustedBlock.Height + 1; height <= newLightBlock.Height; height++ { 548 // 1) Fetch interim light block if needed. 549 if height == newLightBlock.Height { // last light block 550 interimBlock = newLightBlock 551 } else { // intermediate light blocks 552 interimBlock, err = c.lightBlockFromPrimary(ctx, height) 553 if err != nil { 554 return ErrVerificationFailed{From: verifiedBlock.Height, To: height, Reason: err} 555 } 556 } 557 558 // 2) Verify them 559 c.logger.Debug("verify adjacent newLightBlock against verifiedBlock", 560 "trustedHeight", verifiedBlock.Height, 561 "trustedHash", verifiedBlock.Hash(), 562 "newHeight", interimBlock.Height, 563 "newHash", interimBlock.Hash()) 564 565 err = VerifyAdjacent(verifiedBlock.SignedHeader, interimBlock.SignedHeader, interimBlock.ValidatorSet, 566 c.trustingPeriod, now, c.maxClockDrift) 567 if err != nil { 568 err := ErrVerificationFailed{From: verifiedBlock.Height, To: interimBlock.Height, Reason: err} 569 570 switch errors.Unwrap(err).(type) { 571 case ErrInvalidHeader: 572 // If the target header is invalid, return immediately. 573 if err.To == newLightBlock.Height { 574 c.logger.Debug("target header is invalid", "err", err) 575 return err 576 } 577 578 // If some intermediate header is invalid, remove the primary and try again. 579 c.logger.Info("primary sent invalid header -> removing", "err", err, "primary", c.primary) 580 581 replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true) 582 if removeErr != nil { 583 c.logger.Debug("failed to replace primary. Returning original error", "err", removeErr) 584 return err 585 } 586 587 if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) { 588 c.logger.Debug("replaced primary but new primary has a different block to the initial one") 589 return err 590 } 591 592 // attempt to verify header again 593 height-- 594 595 continue 596 default: 597 return err 598 } 599 } 600 601 // 3) Update verifiedBlock 602 verifiedBlock = interimBlock 603 604 // 4) Add verifiedBlock to trace 605 trace = append(trace, verifiedBlock) 606 } 607 608 // Compare header with the witnesses to ensure it's not a fork. 609 // More witnesses we have, more chance to notice one. 610 // 611 // CORRECTNESS ASSUMPTION: there's at least 1 correct full node 612 // (primary or one of the witnesses). 613 return c.detectDivergence(ctx, trace, now) 614 } 615 616 // see VerifyHeader 617 // 618 // verifySkipping finds the middle light block between a trusted and new light block, 619 // reiterating the action until it verifies a light block. A cache of light blocks 620 // requested from source is kept such that when a verification is made, and the 621 // light client tries again to verify the new light block in the middle, the light 622 // client does not need to ask for all the same light blocks again. 623 // 624 // If this function errors, it should always wrap it in a `ErrVerifcationFailed` 625 // struct so that the calling function can determine where it failed and handle 626 // it accordingly. 627 func (c *Client) verifySkipping( 628 ctx context.Context, 629 source provider.Provider, 630 trustedBlock *types.LightBlock, 631 newLightBlock *types.LightBlock, 632 now time.Time) ([]*types.LightBlock, error) { 633 634 var ( 635 // The block cache is ordered in height from highest to lowest. We start 636 // with the newLightBlock and for any height requested in between we add 637 // it. 638 blockCache = []*types.LightBlock{newLightBlock} 639 depth = 0 640 641 verifiedBlock = trustedBlock 642 trace = []*types.LightBlock{trustedBlock} 643 ) 644 645 for { 646 c.logger.Debug("verify non-adjacent newHeader against verifiedBlock", 647 "trustedHeight", verifiedBlock.Height, 648 "trustedHash", verifiedBlock.Hash(), 649 "newHeight", blockCache[depth].Height, 650 "newHash", blockCache[depth].Hash()) 651 652 // Verify the untrusted header. This function is equivalent to 653 // ValidAndVerified in the spec 654 err := Verify(verifiedBlock.SignedHeader, verifiedBlock.ValidatorSet, blockCache[depth].SignedHeader, 655 blockCache[depth].ValidatorSet, c.trustingPeriod, now, c.maxClockDrift, c.trustLevel) 656 switch err.(type) { 657 case nil: 658 // If we have verified the last header then depth will be 0 and we 659 // can return a success along with the trace of intermediate headers 660 if depth == 0 { 661 trace = append(trace, newLightBlock) 662 return trace, nil 663 } 664 // If not, update the lower bound to the previous upper bound 665 verifiedBlock = blockCache[depth] 666 // Remove the light block at the lower bound in the header cache - it will no longer be needed 667 blockCache = blockCache[:depth] 668 // Reset the cache depth so that we start from the upper bound again 669 depth = 0 670 // add verifiedBlock to the trace 671 trace = append(trace, verifiedBlock) 672 673 case ErrNewValSetCantBeTrusted: 674 // the light block current passed validation, but the validator 675 // set is too different to verify it. We keep the block because it 676 // may become valuable later on. 677 // 678 // If we have reached the end of the cache we need to request a 679 // completely new block else we recycle a previously requested one. 680 // In both cases we are taking a block with a closer height to the 681 // previously verified one in the hope that it has a better chance 682 // of having a similar validator set 683 if depth == len(blockCache)-1 { 684 // schedule what the next height we need to fetch is 685 pivotHeight := c.schedule(verifiedBlock.Height, blockCache[depth].Height) 686 interimBlock, providerErr := c.getLightBlock(ctx, source, pivotHeight) 687 if providerErr != nil { 688 return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr} 689 } 690 blockCache = append(blockCache, interimBlock) 691 } 692 depth++ 693 694 // for any verification error we abort the operation and return the error 695 default: 696 return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: blockCache[depth].Height, Reason: err} 697 } 698 } 699 } 700 701 // schedule works out the next height to attempt sequential verification 702 func (c *Client) schedule(lastVerifiedHeight, lastFailedHeight int64) int64 { 703 return lastVerifiedHeight + 704 (lastFailedHeight-lastVerifiedHeight)*verifySkippingNumerator/verifySkippingDenominator 705 } 706 707 // verifySkippingAgainstPrimary does verifySkipping plus it compares new header with 708 // witnesses and replaces primary if it sends the light client an invalid header 709 func (c *Client) verifySkippingAgainstPrimary( 710 ctx context.Context, 711 trustedBlock *types.LightBlock, 712 newLightBlock *types.LightBlock, 713 now time.Time) error { 714 715 trace, err := c.verifySkipping(ctx, c.primary, trustedBlock, newLightBlock, now) 716 if err == nil { 717 // Success! Now compare the header with the witnesses to ensure it's not a fork. 718 // More witnesses we have, more chance to notice one. 719 // 720 // CORRECTNESS ASSUMPTION: there's at least 1 correct full node 721 // (primary or one of the witnesses). 722 if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil { 723 return cmpErr 724 } 725 } 726 727 var e = &ErrVerificationFailed{} 728 // all errors from verify skipping should be `ErrVerificationFailed` 729 // if it's not we just return the error directly 730 if !errors.As(err, e) { 731 return err 732 } 733 734 replace := true 735 switch e.Reason.(type) { 736 // Verification returned an invalid header 737 case ErrInvalidHeader: 738 // If it was the target header, return immediately. 739 if e.To == newLightBlock.Height { 740 c.logger.Debug("target header is invalid", "err", err) 741 return err 742 } 743 744 // If some intermediate header is invalid, remove the primary and try 745 // again. 746 747 // An intermediate header expired. We can no longer validate it as there is 748 // no longer the ability to punish invalid blocks as evidence of misbehavior 749 case ErrOldHeaderExpired: 750 return err 751 752 // This happens if there was a problem in finding the next block or a 753 // context was canceled. 754 default: 755 if errors.Is(e.Reason, context.Canceled) || errors.Is(e.Reason, context.DeadlineExceeded) { 756 return e.Reason 757 } 758 759 if !c.providerShouldBeRemoved(e.Reason) { 760 replace = false 761 } 762 } 763 764 // if we've reached here we're attempting to retry verification with a 765 // different provider 766 c.logger.Info("primary returned error", "err", e, "primary", c.primary, "replace", replace) 767 768 replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, replace) 769 if removeErr != nil { 770 c.logger.Error("failed to replace primary. Returning original error", "err", removeErr) 771 return e.Reason 772 } 773 774 if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) { 775 c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error") 776 return e.Reason 777 } 778 779 // attempt to verify the header again from the trusted block 780 return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now) 781 } 782 783 // LastTrustedHeight returns a last trusted height. -1 and nil are returned if 784 // there are no trusted headers. 785 // 786 // Safe for concurrent use by multiple goroutines. 787 func (c *Client) LastTrustedHeight() (int64, error) { 788 return c.trustedStore.LastLightBlockHeight() 789 } 790 791 // FirstTrustedHeight returns a first trusted height. -1 and nil are returned if 792 // there are no trusted headers. 793 // 794 // Safe for concurrent use by multiple goroutines. 795 func (c *Client) FirstTrustedHeight() (int64, error) { 796 return c.trustedStore.FirstLightBlockHeight() 797 } 798 799 // ChainID returns the chain ID the light client was configured with. 800 // 801 // Safe for concurrent use by multiple goroutines. 802 func (c *Client) ChainID() string { 803 return c.chainID 804 } 805 806 // Primary returns the primary provider. 807 // 808 // NOTE: provider may be not safe for concurrent access. 809 func (c *Client) Primary() provider.Provider { 810 c.providerMutex.Lock() 811 defer c.providerMutex.Unlock() 812 return c.primary 813 } 814 815 // Witnesses returns the witness providers. 816 // 817 // NOTE: providers may be not safe for concurrent access. 818 func (c *Client) Witnesses() []provider.Provider { 819 c.providerMutex.Lock() 820 defer c.providerMutex.Unlock() 821 return c.witnesses 822 } 823 824 // AddProvider adds a providers to the light clients set 825 // 826 // NOTE: The light client does not check for uniqueness 827 func (c *Client) AddProvider(p provider.Provider) { 828 c.providerMutex.Lock() 829 defer c.providerMutex.Unlock() 830 c.witnesses = append(c.witnesses, p) 831 } 832 833 // Cleanup removes all the data (headers and validator sets) stored. Note: the 834 // client must be stopped at this point. 835 func (c *Client) Cleanup() error { 836 c.logger.Info("removing all light blocks") 837 c.latestTrustedBlock = nil 838 return c.trustedStore.Prune(0) 839 } 840 841 func (c *Client) updateTrustedLightBlock(l *types.LightBlock) error { 842 c.logger.Debug("updating trusted light block", "light_block", l) 843 844 if err := c.trustedStore.SaveLightBlock(l); err != nil { 845 return fmt.Errorf("failed to save trusted header: %w", err) 846 } 847 848 if c.pruningSize > 0 { 849 if err := c.trustedStore.Prune(c.pruningSize); err != nil { 850 return fmt.Errorf("prune: %w", err) 851 } 852 } 853 854 if c.latestTrustedBlock == nil || l.Height > c.latestTrustedBlock.Height { 855 c.latestTrustedBlock = l 856 } 857 858 return nil 859 } 860 861 // backwards verification (see VerifyHeaderBackwards func in the spec) verifies 862 // headers before a trusted header. If a sent header is invalid the primary is 863 // replaced with another provider and the operation is repeated. 864 func (c *Client) backwards( 865 ctx context.Context, 866 trustedHeader *types.Header, 867 newHeader *types.Header) error { 868 869 var ( 870 verifiedHeader = trustedHeader 871 interimHeader *types.Header 872 ) 873 874 c.logger.Info(fmt.Sprintf("starting the backward verification process from %d to %d", newHeader.Height, verifiedHeader.Height)) 875 for verifiedHeader.Height > newHeader.Height { 876 interimBlock, err := c.lightBlockFromPrimary(ctx, verifiedHeader.Height-1) 877 if err != nil { 878 return fmt.Errorf("failed to obtain the header at height #%d: %w", verifiedHeader.Height-1, err) 879 } 880 interimHeader = interimBlock.Header 881 c.logger.Debug("verify newHeader against verifiedHeader", 882 "trustedHeight", verifiedHeader.Height, 883 "trustedHash", verifiedHeader.Hash(), 884 "newHeight", interimHeader.Height, 885 "newHash", interimHeader.Hash()) 886 if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil { 887 // verification has failed 888 c.logger.Info("backwards verification failed, replacing primary...", "err", err, "primary", c.primary) 889 890 // the client tries to see if it can get a witness to continue with the request 891 newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true) 892 if replaceErr != nil { 893 c.logger.Debug("failed to replace primary. Returning original error", "err", replaceErr) 894 return err 895 } 896 897 // before continuing we must check that they have the same target header to validate 898 if !bytes.Equal(newPrimarysBlock.Hash(), newHeader.Hash()) { 899 c.logger.Debug("replaced primary but new primary has a different block to the initial one") 900 // return the original error 901 return err 902 } 903 904 // try again with the new primary 905 return c.backwards(ctx, verifiedHeader, newPrimarysBlock.Header) 906 } 907 verifiedHeader = interimHeader 908 } 909 910 return nil 911 } 912 913 // lightBlockFromPrimary retrieves the lightBlock from the primary provider 914 // at the specified height. This method also handles provider behavior as follows: 915 // 916 // 1. If the provider does not respond or does not have the block, it tries again 917 // with a different provider 918 // 2. If all providers return the same error, the light client forwards the error to 919 // where the initial request came from 920 // 3. If the provider provides an invalid light block, is deemed unreliable or returns 921 // any other error, the primary is permanently dropped and is replaced by a witness. 922 func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*types.LightBlock, error) { 923 c.providerMutex.Lock() 924 l, err := c.getLightBlock(ctx, c.primary, height) 925 c.providerMutex.Unlock() 926 927 switch err { 928 case nil: 929 // Everything went smoothly. We reset the lightBlockRequests and return the light block 930 return l, nil 931 932 // catch canceled contexts or deadlines 933 case context.Canceled, context.DeadlineExceeded: 934 return nil, err 935 936 case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh: 937 // we find a new witness to replace the primary 938 c.logger.Info("error from light block request from primary, replacing...", 939 "error", err, "height", height, "primary", c.primary) 940 return c.findNewPrimary(ctx, height, false) 941 942 default: 943 // The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock 944 // These errors mean that the light client should drop the primary and try with another provider instead 945 c.logger.Info("error from light block request from primary, removing...", 946 "error", err, "height", height, "primary", c.primary) 947 return c.findNewPrimary(ctx, height, true) 948 } 949 } 950 951 func (c *Client) getLightBlock(ctx context.Context, p provider.Provider, height int64) (*types.LightBlock, error) { 952 l, err := p.LightBlock(ctx, height) 953 if ctx.Err() != nil { 954 return nil, provider.ErrNoResponse 955 } 956 return l, err 957 } 958 959 // NOTE: requires a providerMutex lock 960 func (c *Client) removeWitnesses(indexes []int) error { 961 if len(c.witnesses) <= len(indexes) { 962 return ErrNoWitnesses 963 } 964 965 // we need to make sure that we remove witnesses by index in the reverse 966 // order so as to not affect the indexes themselves 967 sort.Ints(indexes) 968 for i := len(indexes) - 1; i >= 0; i-- { 969 c.witnesses[indexes[i]] = c.witnesses[len(c.witnesses)-1] 970 c.witnesses = c.witnesses[:len(c.witnesses)-1] 971 } 972 973 return nil 974 } 975 976 type witnessResponse struct { 977 lb *types.LightBlock 978 witnessIndex int 979 err error 980 } 981 982 // findNewPrimary concurrently sends a light block request, promoting the first witness to return 983 // a valid light block as the new primary. The remove option indicates whether the primary should be 984 // entire removed or just appended to the back of the witnesses list. This method also handles witness 985 // errors. If no witness is available, it returns the last error of the witness. 986 func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) (*types.LightBlock, error) { 987 c.providerMutex.Lock() 988 defer c.providerMutex.Unlock() 989 990 if len(c.witnesses) < 1 { 991 return nil, ErrNoWitnesses 992 } 993 994 var ( 995 witnessResponsesC = make(chan witnessResponse, len(c.witnesses)) 996 witnessesToRemove []int 997 lastError error 998 wg sync.WaitGroup 999 ) 1000 1001 ctx, cancel := context.WithCancel(ctx) 1002 defer cancel() 1003 // send out a light block request to all witnesses 1004 for index := range c.witnesses { 1005 wg.Add(1) 1006 go func(witnessIndex int, witnessResponsesC chan witnessResponse) { 1007 defer wg.Done() 1008 1009 lb, err := c.witnesses[witnessIndex].LightBlock(ctx, height) 1010 select { 1011 case witnessResponsesC <- witnessResponse{lb, witnessIndex, err}: 1012 case <-ctx.Done(): 1013 } 1014 1015 }(index, witnessResponsesC) 1016 } 1017 1018 // process all the responses as they come in 1019 for i := 0; i < cap(witnessResponsesC); i++ { 1020 response := <-witnessResponsesC 1021 switch response.err { 1022 // success! We have found a new primary 1023 case nil: 1024 cancel() // cancel all remaining requests to other witnesses 1025 1026 wg.Wait() // wait for all goroutines to finish 1027 1028 // if we are not intending on removing the primary then append the old primary to the end of the witness slice 1029 if !remove { 1030 c.witnesses = append(c.witnesses, c.primary) 1031 } 1032 1033 // promote respondent as the new primary 1034 c.logger.Debug("found new primary", "primary", c.witnesses[response.witnessIndex]) 1035 c.primary = c.witnesses[response.witnessIndex] 1036 1037 // add promoted witness to the list of witnesses to be removed 1038 witnessesToRemove = append(witnessesToRemove, response.witnessIndex) 1039 1040 // remove witnesses marked as bad (the client must do this before we alter the witness slice and change the indexes 1041 // of witnesses). Removal is done in descending order 1042 if err := c.removeWitnesses(witnessesToRemove); err != nil { 1043 return nil, err 1044 } 1045 1046 // return the light block that new primary responded with 1047 return response.lb, nil 1048 1049 // catch canceled contexts or deadlines 1050 case context.Canceled, context.DeadlineExceeded: 1051 return nil, response.err 1052 1053 // process benign errors by logging them only 1054 case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh: 1055 lastError = response.err 1056 c.logger.Info("error on light block request from witness", 1057 "error", response.err, "primary", c.witnesses[response.witnessIndex]) 1058 continue 1059 1060 // process malevolent errors like ErrUnreliableProvider and ErrBadLightBlock by removing the witness 1061 default: 1062 lastError = response.err 1063 c.logger.Error("error on light block request from witness, removing...", 1064 "error", response.err, "primary", c.witnesses[response.witnessIndex]) 1065 witnessesToRemove = append(witnessesToRemove, response.witnessIndex) 1066 } 1067 } 1068 1069 return nil, lastError 1070 } 1071 1072 // compareFirstHeaderWithWitnesses concurrently compares h with all witnesses. If any 1073 // witness reports a different header than h, the function returns an error. 1074 func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.SignedHeader) error { 1075 compareCtx, cancel := context.WithCancel(ctx) 1076 defer cancel() 1077 1078 c.providerMutex.Lock() 1079 defer c.providerMutex.Unlock() 1080 1081 if len(c.witnesses) < 1 { 1082 return ErrNoWitnesses 1083 } 1084 1085 errc := make(chan error, len(c.witnesses)) 1086 for i, witness := range c.witnesses { 1087 go c.compareNewHeaderWithWitness(compareCtx, errc, h, witness, i) 1088 } 1089 1090 witnessesToRemove := make([]int, 0, len(c.witnesses)) 1091 1092 // handle errors from the header comparisons as they come in 1093 for i := 0; i < cap(errc); i++ { 1094 err := <-errc 1095 1096 switch e := err.(type) { 1097 case nil: 1098 continue 1099 case errConflictingHeaders: 1100 c.logger.Error(`witness has a different header. Please check primary is 1101 correct and remove witness. Otherwise, use a different primary`, 1102 "Witness", c.witnesses[e.WitnessIndex], "ExpHeader", h.Hash(), "GotHeader", e.Block.Hash()) 1103 return err 1104 case errBadWitness: 1105 // If witness sent us an invalid header, then remove it 1106 c.logger.Info("witness returned an error, removing...", 1107 "err", err) 1108 witnessesToRemove = append(witnessesToRemove, e.WitnessIndex) 1109 default: 1110 // check for canceled contexts or deadlines 1111 if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { 1112 return err 1113 } 1114 1115 // the witness either didn't respond or didn't have the block. We ignore it. 1116 c.logger.Debug("unable to compare first header with witness, ignoring", 1117 "err", err) 1118 } 1119 1120 } 1121 1122 // remove all witnesses that misbehaved 1123 return c.removeWitnesses(witnessesToRemove) 1124 } 1125 1126 // providerShouldBeRemoved analyzes the nature of the error and whether the provider 1127 // should be removed from the light clients set 1128 func (c *Client) providerShouldBeRemoved(err error) bool { 1129 return errors.As(err, &provider.ErrUnreliableProvider{}) || 1130 errors.As(err, &provider.ErrBadLightBlock{}) || 1131 errors.Is(err, provider.ErrConnectionClosed) 1132 } 1133 1134 func (c *Client) Status(ctx context.Context) *types.LightClientInfo { 1135 chunks := make([]string, len(c.witnesses)) 1136 1137 // If primary is in witness list we do not want to count it twice in the number of peers 1138 primaryNotInWitnessList := 1 1139 for i, val := range c.witnesses { 1140 chunks[i] = val.ID() 1141 if chunks[i] == c.primary.ID() { 1142 primaryNotInWitnessList = 0 1143 } 1144 } 1145 1146 return &types.LightClientInfo{ 1147 PrimaryID: c.primary.ID(), 1148 WitnessesID: chunks, 1149 NumPeers: len(chunks) + primaryNotInWitnessList, 1150 LastTrustedHeight: c.latestTrustedBlock.Height, 1151 LastTrustedHash: c.latestTrustedBlock.Hash(), 1152 LatestBlockTime: c.latestTrustedBlock.Time, 1153 TrustingPeriod: c.trustingPeriod.String(), 1154 // The caller of /status can deduce this from the two variables above 1155 // Having a boolean flag improves readbility 1156 TrustedBlockExpired: HeaderExpired(c.latestTrustedBlock.SignedHeader, c.trustingPeriod, time.Now()), 1157 } 1158 }