github.com/weaviate/weaviate@v1.24.6/usecases/scaler/scaler.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package scaler 13 14 import ( 15 "context" 16 "fmt" 17 "runtime" 18 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/google/uuid" 22 "github.com/pkg/errors" 23 "github.com/sirupsen/logrus" 24 "github.com/weaviate/weaviate/entities/backup" 25 "github.com/weaviate/weaviate/usecases/sharding" 26 ) 27 28 // TODOs: Performance 29 // 30 // 1. Improve performance of syncing a shard to multiple nodes (see rsync.Push). 31 // We could concurrently sync same files to different nodes while avoiding overlapping 32 // 33 // 2. To fail fast, we might consider creating all shards at once and re-initialize them in the final step 34 // 35 // 3. implement scaler.scaleIn 36 37 var ( 38 // ErrUnresolvedName cannot resolve the host address of a node 39 ErrUnresolvedName = errors.New("cannot resolve node name") 40 _NUMCPU = runtime.NumCPU() 41 ) 42 43 // Scaler scales out/in class replicas. 44 // 45 // It scales out a class by replicating its shards on new replicas 46 type Scaler struct { 47 schema SchemaManager 48 cluster cluster 49 source BackUpper // data source 50 client client // client for remote nodes 51 logger logrus.FieldLogger 52 persistenceRoot string 53 } 54 55 // New returns a new instance of Scaler 56 func New(cl cluster, source BackUpper, 57 c client, logger logrus.FieldLogger, persistenceRoot string, 58 ) *Scaler { 59 return &Scaler{ 60 cluster: cl, 61 source: source, 62 client: c, 63 logger: logger, 64 persistenceRoot: persistenceRoot, 65 } 66 } 67 68 // BackUpper is used to back up shards of a specific class 69 type BackUpper interface { 70 // ShardsBackup returns class backup descriptor for a list of shards 71 ShardsBackup(_ context.Context, id, class string, shards []string) (backup.ClassDescriptor, error) 72 // ReleaseBackup releases the backup specified by its id 73 ReleaseBackup(ctx context.Context, id, className string) error 74 } 75 76 // cluster is used by the scaler to query cluster 77 type cluster interface { 78 // Candidates returns list of existing nodes in the cluster 79 Candidates() []string 80 // LocalName returns name of this node 81 LocalName() string 82 // NodeHostname return hosts address for a specific node name 83 NodeHostname(name string) (string, bool) 84 } 85 86 // SchemaManager is used by the scaler to get and update sharding states 87 type SchemaManager interface { 88 CopyShardingState(class string) *sharding.State 89 } 90 91 func (s *Scaler) SetSchemaManager(sm SchemaManager) { 92 s.schema = sm 93 } 94 95 // Scale increase/decrease class replicas. 96 // 97 // It returns the updated sharding state if successful. The caller must then 98 // make sure to broadcast that state to all nodes as part of the "update" 99 // transaction. 100 func (s *Scaler) Scale(ctx context.Context, className string, 101 updated sharding.Config, prevReplFactor, newReplFactor int64, 102 ) (*sharding.State, error) { 103 // First identify what the sharding state was before this change. This is 104 // mainly to be able to compare the diff later, so we know where we need to 105 // make changes 106 ssBefore := s.schema.CopyShardingState(className) 107 if ssBefore == nil { 108 return nil, fmt.Errorf("no sharding state for class %q", className) 109 } 110 if newReplFactor > prevReplFactor { 111 return s.scaleOut(ctx, className, ssBefore, updated, newReplFactor) 112 } 113 114 if newReplFactor < prevReplFactor { 115 return s.scaleIn(ctx, className, updated) 116 } 117 118 return nil, nil 119 } 120 121 // scaleOut replicate class shards on new replicas (nodes): 122 // 123 // * It calculates new sharding state 124 // * It pushes locally existing shards to new replicas 125 // * It delegates replication of remote shards to owner nodes 126 func (s *Scaler) scaleOut(ctx context.Context, className string, ssBefore *sharding.State, 127 updated sharding.Config, replFactor int64, 128 ) (*sharding.State, error) { 129 // Create a deep copy of the old sharding state, so we can start building the 130 // updated state. Because this is a deep copy we don't risk leaking our 131 // changes to anyone else. We can return the changes in the end where the 132 // caller can then make sure to broadcast the new state to the cluster. 133 ssAfter := ssBefore.DeepCopy() 134 ssAfter.Config = updated 135 136 // Identify all shards of the class and adjust the replicas. After this is 137 // done, the affected shards now belong to more nodes than they did before. 138 for name, shard := range ssAfter.Physical { 139 if err := shard.AdjustReplicas(int(replFactor), s.cluster); err != nil { 140 return nil, err 141 } 142 ssAfter.Physical[name] = shard 143 } 144 lDist, nodeDist := distributions(ssBefore, &ssAfter) 145 g, ctx := enterrors.NewErrorGroupWithContextWrapper(s.logger, ctx) 146 // resolve hosts beforehand 147 nodes := nodeDist.nodes() 148 hosts, err := hosts(nodes, s.cluster) 149 if err != nil { 150 return nil, err 151 } 152 for i, node := range nodes { 153 dist := nodeDist[node] 154 i := i 155 g.Go(func() error { 156 err := s.client.IncreaseReplicationFactor(ctx, hosts[i], className, dist) 157 if err != nil { 158 return fmt.Errorf("increase replication factor for class %q on node %q: %w", className, nodes[i], err) 159 } 160 return nil 161 }) 162 } 163 164 g.Go(func() error { 165 if err := s.LocalScaleOut(ctx, className, lDist); err != nil { 166 return fmt.Errorf("increase local replication factor: %w", err) 167 } 168 return nil 169 }) 170 if err := g.Wait(); err != nil { 171 return nil, err 172 } 173 174 // Finally, return sharding state back to schema manager. The schema manager 175 // will then broadcast this updated state to the cluster. This is essentially 176 // what will take the new replication shards live: On the new nodes, if 177 // traffic is incoming, IsShardLocal() would have returned false before. But 178 // now that a copy of the local shard is present it will return true and 179 // serve the traffic. 180 return &ssAfter, nil 181 } 182 183 // LocalScaleOut syncs local shards with new replicas. 184 // 185 // This is the meat&bones of this implementation. 186 // For each shard, we're roughly doing the following: 187 // - Create shards backup, so the shards are safe to copy 188 // - Figure out the copy targets (i.e. each node that is part of the after 189 // state, but wasn't part of the before state yet) 190 // - Create an empty shard on the target node 191 // - Copy over all files from the backup 192 // - ReInit the shard to recognize the copied files 193 // - Release the single-shard backup 194 func (s *Scaler) LocalScaleOut(ctx context.Context, 195 className string, dist ShardDist, 196 ) error { 197 if len(dist) < 1 { 198 return nil 199 } 200 // Create backup of the sin 201 bakID := fmt.Sprintf("_internal_scaler_%s", uuid.New().String()) // todo better name 202 bak, err := s.source.ShardsBackup(ctx, bakID, className, dist.shards()) 203 if err != nil { 204 return fmt.Errorf("create snapshot: %w", err) 205 } 206 207 defer func() { 208 err := s.source.ReleaseBackup(context.Background(), bakID, className) 209 if err != nil { 210 s.logger.WithField("scaler", "releaseBackup").WithField("class", className).Error(err) 211 } 212 }() 213 rsync := newRSync(s.client, s.cluster, s.persistenceRoot) 214 return rsync.Push(ctx, bak.Shards, dist, className, s.logger) 215 } 216 217 func (s *Scaler) scaleIn(ctx context.Context, className string, 218 updated sharding.Config, 219 ) (*sharding.State, error) { 220 return nil, errors.Errorf("scaling in not supported yet") 221 }