github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/stores_server.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/storage"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    22  )
    23  
    24  // Server implements PerReplicaServer.
    25  type Server struct {
    26  	stores *Stores
    27  }
    28  
    29  var _ PerReplicaServer = Server{}
    30  
    31  // MakeServer returns a new instance of Server.
    32  func MakeServer(descriptor *roachpb.NodeDescriptor, stores *Stores) Server {
    33  	return Server{stores}
    34  }
    35  
    36  func (is Server) execStoreCommand(h StoreRequestHeader, f func(*Store) error) error {
    37  	store, err := is.stores.GetStore(h.StoreID)
    38  	if err != nil {
    39  		return err
    40  	}
    41  	return f(store)
    42  }
    43  
    44  // CollectChecksum implements PerReplicaServer.
    45  func (is Server) CollectChecksum(
    46  	ctx context.Context, req *CollectChecksumRequest,
    47  ) (*CollectChecksumResponse, error) {
    48  	resp := &CollectChecksumResponse{}
    49  	err := is.execStoreCommand(req.StoreRequestHeader,
    50  		func(s *Store) error {
    51  			r, err := s.GetReplica(req.RangeID)
    52  			if err != nil {
    53  				return err
    54  			}
    55  			c, err := r.getChecksum(ctx, req.ChecksumID)
    56  			if err != nil {
    57  				return err
    58  			}
    59  			ccr := c.CollectChecksumResponse
    60  			if !bytes.Equal(req.Checksum, ccr.Checksum) {
    61  				// If this check is false, then this request is the replica carrying out
    62  				// the consistency check. The message is spurious, but we want to leave the
    63  				// snapshot (if present) intact.
    64  				if len(req.Checksum) > 0 {
    65  					log.Errorf(ctx, "consistency check failed on range r%d: expected checksum %x, got %x",
    66  						req.RangeID, req.Checksum, ccr.Checksum)
    67  					// Leave resp.Snapshot alone so that the caller will receive what's
    68  					// in it (if anything).
    69  				}
    70  			} else {
    71  				ccr.Snapshot = nil
    72  			}
    73  			resp = &ccr
    74  			return nil
    75  		})
    76  	return resp, err
    77  }
    78  
    79  // WaitForApplication implements PerReplicaServer.
    80  //
    81  // It is the caller's responsibility to cancel or set a timeout on the context.
    82  // If the context is never canceled, WaitForApplication will retry forever.
    83  func (is Server) WaitForApplication(
    84  	ctx context.Context, req *WaitForApplicationRequest,
    85  ) (*WaitForApplicationResponse, error) {
    86  	resp := &WaitForApplicationResponse{}
    87  	err := is.execStoreCommand(req.StoreRequestHeader, func(s *Store) error {
    88  		// TODO(benesch): Once Replica changefeeds land, see if we can implement
    89  		// this request handler without polling.
    90  		retryOpts := retry.Options{InitialBackoff: 10 * time.Millisecond}
    91  		for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
    92  			// Long-lived references to replicas are frowned upon, so re-fetch the
    93  			// replica on every turn of the loop.
    94  			repl, err := s.GetReplica(req.RangeID)
    95  			if err != nil {
    96  				return err
    97  			}
    98  			repl.mu.RLock()
    99  			leaseAppliedIndex := repl.mu.state.LeaseAppliedIndex
   100  			repl.mu.RUnlock()
   101  			if leaseAppliedIndex >= req.LeaseIndex {
   102  				// For performance reasons, we don't sync to disk when
   103  				// applying raft commands. This means that if a node restarts
   104  				// after applying but before the next sync, its
   105  				// LeaseAppliedIndex could temporarily regress (until it
   106  				// reapplies its latest raft log entries).
   107  				//
   108  				// Merging relies on the monotonicity of the log applied
   109  				// index, so before returning ensure that rocksdb has synced
   110  				// everything up to this point to disk.
   111  				//
   112  				// https://github.com/cockroachdb/cockroach/issues/33120
   113  				return storage.WriteSyncNoop(ctx, s.engine)
   114  			}
   115  		}
   116  		if ctx.Err() == nil {
   117  			log.Fatal(ctx, "infinite retry loop exited but context has no error")
   118  		}
   119  		return ctx.Err()
   120  	})
   121  	return resp, err
   122  }
   123  
   124  // WaitForReplicaInit implements PerReplicaServer.
   125  //
   126  // It is the caller's responsibility to cancel or set a timeout on the context.
   127  // If the context is never canceled, WaitForReplicaInit will retry forever.
   128  func (is Server) WaitForReplicaInit(
   129  	ctx context.Context, req *WaitForReplicaInitRequest,
   130  ) (*WaitForReplicaInitResponse, error) {
   131  	resp := &WaitForReplicaInitResponse{}
   132  	err := is.execStoreCommand(req.StoreRequestHeader, func(s *Store) error {
   133  		retryOpts := retry.Options{InitialBackoff: 10 * time.Millisecond}
   134  		for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
   135  			// Long-lived references to replicas are frowned upon, so re-fetch the
   136  			// replica on every turn of the loop.
   137  			if repl, err := s.GetReplica(req.RangeID); err == nil && repl.IsInitialized() {
   138  				return nil
   139  			}
   140  		}
   141  		if ctx.Err() == nil {
   142  			log.Fatal(ctx, "infinite retry loop exited but context has no error")
   143  		}
   144  		return ctx.Err()
   145  	})
   146  	return resp, err
   147  }