github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/stores.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "unsafe" 17 18 "github.com/cockroachdb/cockroach/pkg/clusterversion" 19 "github.com/cockroachdb/cockroach/pkg/gossip" 20 "github.com/cockroachdb/cockroach/pkg/keys" 21 "github.com/cockroachdb/cockroach/pkg/kv" 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/storage" 24 "github.com/cockroachdb/cockroach/pkg/util/hlc" 25 "github.com/cockroachdb/cockroach/pkg/util/log" 26 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 27 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 28 "github.com/cockroachdb/errors" 29 ) 30 31 // Stores provides methods to access a collection of stores. There's 32 // a visitor pattern and also an implementation of the client.Sender 33 // interface which directs a call to the appropriate store based on 34 // the call's key range. Stores also implements the gossip.Storage 35 // interface, which allows gossip bootstrap information to be 36 // persisted consistently to every store and the most recent bootstrap 37 // information to be read at node startup. 38 type Stores struct { 39 log.AmbientContext 40 clock *hlc.Clock 41 storeMap syncutil.IntMap // map[roachpb.StoreID]*Store 42 43 mu struct { 44 syncutil.Mutex 45 biLatestTS hlc.Timestamp // Timestamp of gossip bootstrap info 46 latestBI *gossip.BootstrapInfo // Latest cached bootstrap info 47 } 48 } 49 50 var _ kv.Sender = &Stores{} // Stores implements the client.Sender interface 51 var _ gossip.Storage = &Stores{} // Stores implements the gossip.Storage interface 52 53 // NewStores returns a local-only sender which directly accesses 54 // a collection of stores. 55 func NewStores(ambient log.AmbientContext, clock *hlc.Clock) *Stores { 56 return &Stores{ 57 AmbientContext: ambient, 58 clock: clock, 59 } 60 } 61 62 // IsMeta1Leaseholder returns whether the specified stores owns 63 // the meta1 lease. Returns an error if any. 64 func (ls *Stores) IsMeta1Leaseholder(now hlc.Timestamp) (bool, error) { 65 repl, _, err := ls.GetReplicaForRangeID(1) 66 if roachpb.IsRangeNotFoundError(err) { 67 return false, nil 68 } 69 if err != nil { 70 return false, err 71 } 72 return repl.OwnsValidLease(now), nil 73 } 74 75 // GetStoreCount returns the number of stores this node is exporting. 76 func (ls *Stores) GetStoreCount() int { 77 var count int 78 ls.storeMap.Range(func(_ int64, _ unsafe.Pointer) bool { 79 count++ 80 return true 81 }) 82 return count 83 } 84 85 // HasStore returns true if the specified store is owned by this Stores. 86 func (ls *Stores) HasStore(storeID roachpb.StoreID) bool { 87 _, ok := ls.storeMap.Load(int64(storeID)) 88 return ok 89 } 90 91 // GetStore looks up the store by store ID. Returns an error 92 // if not found. 93 func (ls *Stores) GetStore(storeID roachpb.StoreID) (*Store, error) { 94 if value, ok := ls.storeMap.Load(int64(storeID)); ok { 95 return (*Store)(value), nil 96 } 97 return nil, roachpb.NewStoreNotFoundError(storeID) 98 } 99 100 // AddStore adds the specified store to the store map. 101 func (ls *Stores) AddStore(s *Store) { 102 if _, loaded := ls.storeMap.LoadOrStore(int64(s.Ident.StoreID), unsafe.Pointer(s)); loaded { 103 panic(fmt.Sprintf("cannot add store twice: %+v", s.Ident)) 104 } 105 // If we've already read the gossip bootstrap info, ensure that 106 // all stores have the most recent values. 107 ls.mu.Lock() 108 defer ls.mu.Unlock() 109 if ls.mu.biLatestTS != (hlc.Timestamp{}) { 110 if err := ls.updateBootstrapInfoLocked(ls.mu.latestBI); err != nil { 111 ctx := ls.AnnotateCtx(context.TODO()) 112 log.Errorf(ctx, "failed to update bootstrap info on newly added store: %+v", err) 113 } 114 } 115 } 116 117 // RemoveStore removes the specified store from the store map. 118 func (ls *Stores) RemoveStore(s *Store) { 119 ls.storeMap.Delete(int64(s.Ident.StoreID)) 120 } 121 122 // VisitStores implements a visitor pattern over stores in the 123 // storeMap. The specified function is invoked with each store in 124 // turn. Care is taken to invoke the visitor func without the lock 125 // held to avoid inconsistent lock orderings, as some visitor 126 // functions may call back into the Stores object. Stores are visited 127 // in random order. 128 func (ls *Stores) VisitStores(visitor func(s *Store) error) error { 129 var err error 130 ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool { 131 err = visitor((*Store)(v)) 132 return err == nil 133 }) 134 return err 135 } 136 137 // GetReplicaForRangeID returns the replica and store which contains the 138 // specified range. If the replica is not found on any store then 139 // roachpb.RangeNotFoundError will be returned. 140 func (ls *Stores) GetReplicaForRangeID( 141 rangeID roachpb.RangeID, 142 ) (replica *Replica, store *Store, err error) { 143 err = ls.VisitStores(func(s *Store) error { 144 r, err := s.GetReplica(rangeID) 145 if err == nil { 146 replica, store = r, s 147 return nil 148 } 149 if errors.HasType(err, (*roachpb.RangeNotFoundError)(nil)) { 150 return nil 151 } 152 return err 153 }) 154 if err != nil { 155 return nil, nil, err 156 } 157 if replica == nil { 158 return nil, nil, roachpb.NewRangeNotFoundError(rangeID, 0) 159 } 160 return replica, store, nil 161 } 162 163 // Send implements the client.Sender interface. The store is looked up from the 164 // store map using the ID specified in the request. 165 func (ls *Stores) Send( 166 ctx context.Context, ba roachpb.BatchRequest, 167 ) (*roachpb.BatchResponse, *roachpb.Error) { 168 if err := ba.ValidateForEvaluation(); err != nil { 169 log.Fatalf(ctx, "invalid batch (%s): %s", ba, err) 170 } 171 172 store, err := ls.GetStore(ba.Replica.StoreID) 173 if err != nil { 174 return nil, roachpb.NewError(err) 175 } 176 177 br, pErr := store.Send(ctx, ba) 178 if br != nil && br.Error != nil { 179 panic(roachpb.ErrorUnexpectedlySet(store, br)) 180 } 181 return br, pErr 182 } 183 184 // RangeFeed registers a rangefeed over the specified span. It sends updates to 185 // the provided stream and returns with an optional error when the rangefeed is 186 // complete. 187 func (ls *Stores) RangeFeed( 188 args *roachpb.RangeFeedRequest, stream roachpb.Internal_RangeFeedServer, 189 ) *roachpb.Error { 190 ctx := stream.Context() 191 if args.RangeID == 0 { 192 log.Fatal(ctx, "rangefeed request missing range ID") 193 } else if args.Replica.StoreID == 0 { 194 log.Fatal(ctx, "rangefeed request missing store ID") 195 } 196 197 store, err := ls.GetStore(args.Replica.StoreID) 198 if err != nil { 199 return roachpb.NewError(err) 200 } 201 202 return store.RangeFeed(args, stream) 203 } 204 205 // ReadBootstrapInfo implements the gossip.Storage interface. Read 206 // attempts to read gossip bootstrap info from every known store and 207 // finds the most recent from all stores to initialize the bootstrap 208 // info argument. Returns an error on any issues reading data for the 209 // stores (but excluding the case in which no data has been persisted 210 // yet). 211 func (ls *Stores) ReadBootstrapInfo(bi *gossip.BootstrapInfo) error { 212 var latestTS hlc.Timestamp 213 214 ctx := ls.AnnotateCtx(context.TODO()) 215 var err error 216 217 // Find the most recent bootstrap info. 218 ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool { 219 s := (*Store)(v) 220 var storeBI gossip.BootstrapInfo 221 var ok bool 222 ok, err = storage.MVCCGetProto(ctx, s.engine, keys.StoreGossipKey(), hlc.Timestamp{}, &storeBI, 223 storage.MVCCGetOptions{}) 224 if err != nil { 225 return false 226 } 227 if ok && latestTS.Less(storeBI.Timestamp) { 228 latestTS = storeBI.Timestamp 229 *bi = storeBI 230 } 231 return true 232 }) 233 if err != nil { 234 return err 235 } 236 log.Infof(ctx, "read %d node addresses from persistent storage", len(bi.Addresses)) 237 238 ls.mu.Lock() 239 defer ls.mu.Unlock() 240 return ls.updateBootstrapInfoLocked(bi) 241 } 242 243 // WriteBootstrapInfo implements the gossip.Storage interface. Write 244 // persists the supplied bootstrap info to every known store. Returns 245 // nil on success; otherwise returns first error encountered writing 246 // to the stores. 247 func (ls *Stores) WriteBootstrapInfo(bi *gossip.BootstrapInfo) error { 248 ls.mu.Lock() 249 defer ls.mu.Unlock() 250 bi.Timestamp = ls.clock.Now() 251 if err := ls.updateBootstrapInfoLocked(bi); err != nil { 252 return err 253 } 254 ctx := ls.AnnotateCtx(context.TODO()) 255 log.Infof(ctx, "wrote %d node addresses to persistent storage", len(bi.Addresses)) 256 return nil 257 } 258 259 func (ls *Stores) updateBootstrapInfoLocked(bi *gossip.BootstrapInfo) error { 260 if bi.Timestamp.Less(ls.mu.biLatestTS) { 261 return nil 262 } 263 ctx := ls.AnnotateCtx(context.TODO()) 264 // Update the latest timestamp and set cached version. 265 ls.mu.biLatestTS = bi.Timestamp 266 ls.mu.latestBI = protoutil.Clone(bi).(*gossip.BootstrapInfo) 267 // Update all stores. 268 var err error 269 ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool { 270 s := (*Store)(v) 271 err = storage.MVCCPutProto(ctx, s.engine, nil, keys.StoreGossipKey(), hlc.Timestamp{}, nil, bi) 272 return err == nil 273 }) 274 return err 275 } 276 277 // ReadVersionFromEngineOrZero reads the persisted cluster version from the 278 // engine, falling back to the zero value. 279 func ReadVersionFromEngineOrZero( 280 ctx context.Context, reader storage.Reader, 281 ) (clusterversion.ClusterVersion, error) { 282 var cv clusterversion.ClusterVersion 283 cv, err := ReadClusterVersion(ctx, reader) 284 if err != nil { 285 return clusterversion.ClusterVersion{}, err 286 } 287 return cv, nil 288 } 289 290 // WriteClusterVersionToEngines writes the given version to the given engines, 291 // Returns nil on success; otherwise returns first error encountered writing to 292 // the stores. 293 // 294 // WriteClusterVersion makes no attempt to validate the supplied version. 295 func WriteClusterVersionToEngines( 296 ctx context.Context, engines []storage.Engine, cv clusterversion.ClusterVersion, 297 ) error { 298 for _, eng := range engines { 299 if err := WriteClusterVersion(ctx, eng, cv); err != nil { 300 return errors.Wrapf(err, "error writing version to engine %s", eng) 301 } 302 } 303 return nil 304 } 305 306 // SynthesizeClusterVersionFromEngines returns the cluster version that was read 307 // from the engines or, if none are initialized, binaryMinSupportedVersion. 308 // Typically all initialized engines will have the same version persisted, 309 // though ill-timed crashes can result in situations where this is not the 310 // case. Then, the largest version seen is returned. 311 // 312 // binaryVersion is the version of this binary. An error is returned if 313 // any engine has a higher version, as this would indicate that this node 314 // has previously acked the higher cluster version but is now running an 315 // old binary, which is unsafe. 316 // 317 // binaryMinSupportedVersion is the minimum version supported by this binary. An 318 // error is returned if any engine has a version lower that this. 319 func SynthesizeClusterVersionFromEngines( 320 ctx context.Context, 321 engines []storage.Engine, 322 binaryVersion, binaryMinSupportedVersion roachpb.Version, 323 ) (clusterversion.ClusterVersion, error) { 324 // Find the most recent bootstrap info. 325 type originVersion struct { 326 roachpb.Version 327 origin string 328 } 329 330 maxPossibleVersion := roachpb.Version{Major: 999999} // Sort above any real version. 331 minStoreVersion := originVersion{ 332 Version: maxPossibleVersion, 333 origin: "(no store)", 334 } 335 336 // We run this twice because it's only after having seen all the versions 337 // that we can decide whether the node catches a version error. However, we 338 // also want to name at least one engine that violates the version 339 // constraints, which at the latest the second loop will achieve (because 340 // then minStoreVersion don't change any more). 341 for _, eng := range engines { 342 eng := eng.(storage.Reader) // we're read only 343 var cv clusterversion.ClusterVersion 344 cv, err := ReadVersionFromEngineOrZero(ctx, eng) 345 if err != nil { 346 return clusterversion.ClusterVersion{}, err 347 } 348 if cv.Version == (roachpb.Version{}) { 349 // This is needed when a node first joins an existing cluster, in 350 // which case it won't know what version to use until the first 351 // Gossip update comes in. 352 cv.Version = binaryMinSupportedVersion 353 } 354 355 // Avoid running a binary with a store that is too new. For example, 356 // restarting into 1.1 after having upgraded to 1.2 doesn't work. 357 if binaryVersion.Less(cv.Version) { 358 return clusterversion.ClusterVersion{}, errors.Errorf( 359 "cockroach version v%s is incompatible with data in store %s; use version v%s or later", 360 binaryVersion, eng, cv.Version) 361 } 362 363 // Track smallest use version encountered. 364 if cv.Version.Less(minStoreVersion.Version) { 365 minStoreVersion.Version = cv.Version 366 minStoreVersion.origin = fmt.Sprint(eng) 367 } 368 } 369 370 // If no use version was found, fall back to our binaryMinSupportedVersion. This 371 // is the case when a brand new node is joining an existing cluster (which 372 // may be on any older version this binary supports). 373 if minStoreVersion.Version == maxPossibleVersion { 374 minStoreVersion.Version = binaryMinSupportedVersion 375 } 376 377 cv := clusterversion.ClusterVersion{ 378 Version: minStoreVersion.Version, 379 } 380 log.Eventf(ctx, "read ClusterVersion %+v", cv) 381 382 // Avoid running a binary too new for this store. This is what you'd catch 383 // if, say, you restarted directly from 1.0 into 1.2 (bumping the min 384 // version) without going through 1.1 first. It would also be what you catch if 385 // you are starting 1.1 for the first time (after 1.0), but it crashes 386 // half-way through the startup sequence (so now some stores have 1.1, but 387 // some 1.0), in which case you are expected to run 1.1 again (hopefully 388 // without the crash this time) which would then rewrite all the stores. 389 // 390 // We only verify this now because as we iterate through the stores, we 391 // may not yet have picked up the final versions we're actually planning 392 // to use. 393 if minStoreVersion.Version.Less(binaryMinSupportedVersion) { 394 return clusterversion.ClusterVersion{}, errors.Errorf("store %s, last used with cockroach version v%s, "+ 395 "is too old for running version v%s (which requires data from v%s or later)", 396 minStoreVersion.origin, minStoreVersion.Version, binaryVersion, binaryMinSupportedVersion) 397 } 398 return cv, nil 399 } 400 401 func (ls *Stores) engines() []storage.Engine { 402 var engines []storage.Engine 403 ls.storeMap.Range(func(_ int64, v unsafe.Pointer) bool { 404 engines = append(engines, (*Store)(v).Engine()) 405 return true // want more 406 }) 407 return engines 408 }