github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/tools/deploy/planner.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package deploy 22 23 import ( 24 "fmt" 25 "sort" 26 "sync" 27 28 "github.com/m3db/m3/src/cluster/services" 29 "github.com/m3db/m3/src/x/errors" 30 xsync "github.com/m3db/m3/src/x/sync" 31 ) 32 33 var ( 34 emptyPlan deploymentPlan 35 emptyStep deploymentStep 36 ) 37 38 // planner generates deployment plans for given instances under constraints. 39 type planner interface { 40 // GeneratePlan generates a deployment plan for given target instances. 41 GeneratePlan(toDeploy, all instanceMetadatas) (deploymentPlan, error) 42 43 // GenerateOneStep generates one deployment step for given target instances. 44 GenerateOneStep(toDeploy, all instanceMetadatas) (deploymentStep, error) 45 } 46 47 type deploymentPlanner struct { 48 leaderService services.LeaderService 49 workers xsync.WorkerPool 50 electionKeyFmt string 51 maxStepSize int 52 validatorFactory validatorFactory 53 } 54 55 // newPlanner creates a new deployment planner. 56 func newPlanner(client AggregatorClient, opts PlannerOptions) planner { 57 workers := opts.WorkerPool() 58 validatorFactory := newValidatorFactory(client, workers) 59 return deploymentPlanner{ 60 leaderService: opts.LeaderService(), 61 workers: opts.WorkerPool(), 62 electionKeyFmt: opts.ElectionKeyFmt(), 63 maxStepSize: opts.MaxStepSize(), 64 validatorFactory: validatorFactory, 65 } 66 } 67 68 func (p deploymentPlanner) GeneratePlan( 69 toDeploy, all instanceMetadatas, 70 ) (deploymentPlan, error) { 71 grouped, err := p.groupInstancesByShardSetID(toDeploy, all) 72 if err != nil { 73 return emptyPlan, fmt.Errorf("unable to group instances by shard set id: %v", err) 74 } 75 return p.generatePlan(grouped, len(toDeploy), p.maxStepSize), nil 76 } 77 78 func (p deploymentPlanner) GenerateOneStep( 79 toDeploy, all instanceMetadatas, 80 ) (deploymentStep, error) { 81 grouped, err := p.groupInstancesByShardSetID(toDeploy, all) 82 if err != nil { 83 return emptyStep, fmt.Errorf("unable to group instances by shard set id: %v", err) 84 } 85 return p.generateStep(grouped, p.maxStepSize), nil 86 } 87 88 func (p deploymentPlanner) generatePlan( 89 instances map[uint32]*instanceGroup, 90 numInstances int, 91 maxStepSize int, 92 ) deploymentPlan { 93 var ( 94 step deploymentStep 95 plan deploymentPlan 96 total = numInstances 97 ) 98 for total > 0 { 99 step = p.generateStep(instances, maxStepSize) 100 plan.Steps = append(plan.Steps, step) 101 total -= len(step.Targets) 102 } 103 return plan 104 } 105 106 func (p deploymentPlanner) generateStep( 107 instances map[uint32]*instanceGroup, 108 maxStepSize int, 109 ) deploymentStep { 110 // NB(xichen): we always choose instances that are currently in the follower state first, 111 // unless there are no more follower instances, in which case we'll deploy the leader instances. 112 // This is to reduce the overall deployment time due to reduced number of leader promotions and 113 // as such we are less likely to need to wait for the follower instances to be ready to take over 114 // the leader role. 115 step := p.generateStepFromTargetType(instances, maxStepSize, followerTarget) 116 117 // If we have found some follower instances to deploy, we don't attempt to deploy leader 118 // instances in the same step even if we have not reached the max step size to avoid delaying 119 // deploying to the followers due to deploying leader instances. 120 if len(step.Targets) > 0 { 121 return step 122 } 123 124 // If we have not found any followers, we proceed to deploy leader instances. 125 return p.generateStepFromTargetType(instances, maxStepSize, leaderTarget) 126 } 127 128 func (p deploymentPlanner) generateStepFromTargetType( 129 instances map[uint32]*instanceGroup, 130 maxStepSize int, 131 targetType targetType, 132 ) deploymentStep { 133 step := deploymentStep{Targets: make([]deploymentTarget, 0, maxStepSize)} 134 for shardSetID, group := range instances { 135 if len(group.ToDeploy) == 0 { 136 delete(instances, shardSetID) 137 continue 138 } 139 140 done := false 141 for i, instance := range group.ToDeploy { 142 if !matchTargetType(instance.PlacementInstanceID, group.LeaderID, targetType) { 143 continue 144 } 145 target := deploymentTarget{ 146 Instance: instance, 147 Validator: p.validatorFactory.ValidatorFor(instance, group, targetType), 148 } 149 step.Targets = append(step.Targets, target) 150 group.removeInstanceToDeploy(i) 151 if maxStepSize != 0 && len(step.Targets) >= maxStepSize { 152 done = true 153 } 154 break 155 } 156 if done { 157 break 158 } 159 } 160 161 // Sort targets by instance id for deterministic ordering. 162 sort.Sort(targetsByInstanceIDAsc(step.Targets)) 163 return step 164 } 165 166 func (p deploymentPlanner) groupInstancesByShardSetID( 167 toDeploy, all instanceMetadatas, 168 ) (map[uint32]*instanceGroup, error) { 169 grouped := make(map[uint32]*instanceGroup, len(toDeploy)) 170 171 // Group the instances to be deployed by shard set id. 172 for _, instance := range toDeploy { 173 shardSetID := instance.ShardSetID 174 group, exists := grouped[shardSetID] 175 if !exists { 176 group = &instanceGroup{ 177 ToDeploy: make(instanceMetadatas, 0, 2), 178 All: make(instanceMetadatas, 0, 2), 179 } 180 } 181 group.ToDeploy = append(group.ToDeploy, instance) 182 grouped[shardSetID] = group 183 } 184 185 // Determine the full set of instances in each group. 186 for _, instance := range all { 187 shardSetID := instance.ShardSetID 188 group, exists := grouped[shardSetID] 189 if !exists { 190 continue 191 } 192 group.All = append(group.All, instance) 193 } 194 195 // Determine the leader of each group. 196 var ( 197 wg sync.WaitGroup 198 errCh = make(chan error, len(grouped)) 199 ) 200 for shardSetID, group := range grouped { 201 shardSetID, group := shardSetID, group 202 wg.Add(1) 203 p.workers.Go(func() { 204 defer wg.Done() 205 206 electionKey := fmt.Sprintf(p.electionKeyFmt, shardSetID) 207 leader, err := p.leaderService.Leader(electionKey) 208 if err != nil { 209 err = fmt.Errorf("unable to determine leader for shard set id %d: %v", shardSetID, err) 210 errCh <- err 211 return 212 } 213 for _, instance := range group.All { 214 if instance.PlacementInstanceID == leader { 215 group.LeaderID = instance.PlacementInstanceID 216 return 217 } 218 } 219 err = fmt.Errorf("unknown leader %s for shard set id %d", leader, shardSetID) 220 errCh <- err 221 }) 222 } 223 224 wg.Wait() 225 close(errCh) 226 multiErr := errors.NewMultiError() 227 for err := range errCh { 228 multiErr = multiErr.Add(err) 229 } 230 if err := multiErr.FinalError(); err != nil { 231 return nil, err 232 } 233 return grouped, nil 234 } 235 236 // deploymentTarget is a deployment target. 237 type deploymentTarget struct { 238 Instance instanceMetadata 239 Validator validator 240 } 241 242 func (t deploymentTarget) String() string { return t.Instance.PlacementInstanceID } 243 244 // deploymentTargets is a list of deployment targets. 245 type deploymentTargets []deploymentTarget 246 247 func (targets deploymentTargets) DeploymentInstanceIDs() []string { 248 deploymentInstanceIDs := make([]string, 0, len(targets)) 249 for _, target := range targets { 250 deploymentInstanceIDs = append(deploymentInstanceIDs, target.Instance.DeploymentInstanceID) 251 } 252 return deploymentInstanceIDs 253 } 254 255 type targetsByInstanceIDAsc []deploymentTarget 256 257 func (a targetsByInstanceIDAsc) Len() int { return len(a) } 258 func (a targetsByInstanceIDAsc) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 259 260 func (a targetsByInstanceIDAsc) Less(i, j int) bool { 261 return a[i].Instance.PlacementInstanceID < a[j].Instance.PlacementInstanceID 262 } 263 264 // deploymentStep is a deployment step. 265 type deploymentStep struct { 266 Targets deploymentTargets 267 } 268 269 // deploymentPlan is a deployment plan. 270 type deploymentPlan struct { 271 Steps []deploymentStep 272 } 273 274 type targetType int 275 276 const ( 277 followerTarget targetType = iota 278 leaderTarget 279 ) 280 281 func matchTargetType( 282 instanceID string, 283 leaderID string, 284 targetType targetType, 285 ) bool { 286 if targetType == leaderTarget { 287 return instanceID == leaderID 288 } 289 return instanceID != leaderID 290 } 291 292 type instanceGroup struct { 293 // LeaderID is the instance id of the leader in the group. 294 LeaderID string 295 296 // ToDeploy are the instances to be deployed in the group. 297 ToDeploy instanceMetadatas 298 299 // All include all the instances in the group regardless of whether they need to be deployed. 300 All instanceMetadatas 301 } 302 303 func (group *instanceGroup) removeInstanceToDeploy(i int) { 304 lastIdx := len(group.ToDeploy) - 1 305 group.ToDeploy[i], group.ToDeploy[lastIdx] = group.ToDeploy[lastIdx], group.ToDeploy[i] 306 group.ToDeploy = group.ToDeploy[:lastIdx] 307 }