go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/quota/manager.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package quota 16 17 import ( 18 "context" 19 "crypto/md5" 20 "encoding/hex" 21 "fmt" 22 "strings" 23 "sync" 24 "time" 25 26 "google.golang.org/protobuf/types/known/durationpb" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/errors" 30 "go.chromium.org/luci/common/retry" 31 "go.chromium.org/luci/common/retry/transient" 32 "go.chromium.org/luci/server/auth" 33 srvquota "go.chromium.org/luci/server/quota" 34 "go.chromium.org/luci/server/quota/quotapb" 35 36 cfgpb "go.chromium.org/luci/cv/api/config/v2" 37 "go.chromium.org/luci/cv/internal/common" 38 "go.chromium.org/luci/cv/internal/configs/prjcfg" 39 "go.chromium.org/luci/cv/internal/metrics" 40 "go.chromium.org/luci/cv/internal/run" 41 ) 42 43 var qinit sync.Once 44 var qapp SrvQuota 45 46 const ( 47 // appID to register with quota module. 48 appID = "cv" 49 50 // Resource types that the quota can use. 51 runResource = "runs" 52 tryjobResource = "tryjobs" 53 54 defaultUserLimit = "default" 55 56 // Default lifetime of a quota account. 57 accountLifeTime = 3 * 24 * time.Hour // 3 days 58 ) 59 60 // Manager manages the quota accounts for CV users. 61 type Manager struct { 62 qapp SrvQuota 63 } 64 65 // SrvQuota manages quota 66 type SrvQuota interface { 67 LoadPoliciesManual(ctx context.Context, realm string, version string, cfg *quotapb.PolicyConfig) (*quotapb.PolicyConfigID, error) 68 AccountID(realm, namespace, name, resourceType string) *quotapb.AccountID 69 } 70 71 // DebitRunQuota debits the run quota from a given user's account. 72 func (qm *Manager) DebitRunQuota(ctx context.Context, r *run.Run) (*quotapb.OpResult, *cfgpb.UserLimit, error) { 73 return qm.runQuotaOp(ctx, r, "debit", -1) 74 } 75 76 // CreditRunQuota credits the run quota into a given user's account. 77 func (qm *Manager) CreditRunQuota(ctx context.Context, r *run.Run) (*quotapb.OpResult, *cfgpb.UserLimit, error) { 78 return qm.runQuotaOp(ctx, r, "credit", 1) 79 } 80 81 // DebitTryjobQuota debits the tryjob quota from a given user's account. 82 func (qm *Manager) DebitTryjobQuota(ctx context.Context) (*quotapb.OpResult, error) { 83 return nil, nil 84 } 85 86 // CreditTryjobQuota credits the tryjob quota into a given user's account. 87 func (qm *Manager) CreditTryjobQuota(ctx context.Context) (*quotapb.OpResult, error) { 88 return nil, nil 89 } 90 91 // RunQuotaAccountID returns the account id of the run quota for the given run. 92 func (qm *Manager) RunQuotaAccountID(r *run.Run) *quotapb.AccountID { 93 // The email is hashed using md5 for the account id to avoid PII. 94 emailHash := md5.Sum([]byte(r.BilledTo.Email())) 95 return qm.qapp.AccountID(r.ID.LUCIProject(), r.ConfigGroupID.Name(), hex.EncodeToString(emailHash[:]), runResource) 96 } 97 98 // runQuotaOp updates the run quota for the given run state by the given delta. 99 func (qm *Manager) runQuotaOp(ctx context.Context, r *run.Run, opID string, delta int64) (*quotapb.OpResult, *cfgpb.UserLimit, error) { 100 userLimit, err := findRunLimit(ctx, r) 101 102 // userLimit == nil when no run user limit is configured for this user. 103 if err != nil || userLimit == nil { 104 return nil, nil, err 105 } 106 107 policyID := "apb.PolicyID{ 108 Config: policyConfigID(r.ID.LUCIProject(), r.ConfigGroupID.Hash()), 109 Key: runPolicyKey(r.ConfigGroupID.Name(), userLimit.GetName()), 110 } 111 112 // When policy is set to unlimited, the op is applied with IGNORE_POLICY_BOUNDS. 113 options := quotapb.Op_WITH_POLICY_LIMIT_DELTA 114 if userLimit.GetRun().GetMaxActive().GetUnlimited() { 115 options |= quotapb.Op_IGNORE_POLICY_BOUNDS 116 } 117 118 quotaOp := []*quotapb.Op{ 119 { 120 AccountId: qm.RunQuotaAccountID(r), 121 PolicyId: policyID, 122 RelativeTo: quotapb.Op_CURRENT_BALANCE, 123 Delta: delta, 124 Options: uint32(options), 125 }, 126 } 127 128 // When server/quota does not have the policyId already, rewrite the policy and retry the op. 129 var opResponse *quotapb.ApplyOpsResponse 130 err = retry.Retry(clock.Tag(ctx, common.LaunchRetryClockTag), makeRetryFactory(), func() (err error) { 131 opResponse, err = srvquota.ApplyOps(ctx, requestID(r.ID, opID), durationpb.New(accountLifeTime), quotaOp) 132 if errors.Unwrap(err) == srvquota.ErrQuotaApply && opResponse.Results[0].Status == quotapb.OpResult_ERR_UNKNOWN_POLICY { 133 if _, err := qm.WritePolicy(ctx, r.ID.LUCIProject()); err != nil { 134 return err 135 } 136 137 return errors.Annotate(err, "ApplyOps: ERR_UNKNOWN_POLICY").Tag(transient.Tag).Err() 138 } 139 140 return 141 }, nil) 142 143 if err == nil || errors.Unwrap(err) == srvquota.ErrQuotaApply { 144 metrics.Internal.QuotaOp.Add( 145 ctx, 146 1, 147 r.ID.LUCIProject(), 148 r.ConfigGroupID.Name(), 149 policyID.GetKey().GetName(), 150 runResource, 151 opID, 152 opResponse.Results[0].Status.String(), 153 ) 154 155 // On ErrQuotaApply, OpResult.Status stores the reason for failure. 156 return opResponse.Results[0], userLimit, err 157 } 158 159 metrics.Internal.QuotaOp.Add( 160 ctx, 161 1, 162 r.ID.LUCIProject(), 163 r.ConfigGroupID.Name(), 164 policyID.GetKey().GetName(), 165 runResource, 166 opID, 167 "UNKNOWN_ERROR", 168 ) 169 170 return nil, userLimit, err 171 } 172 173 func makeRetryFactory() retry.Factory { 174 return transient.Only(func() retry.Iterator { 175 return &retry.ExponentialBackoff{ 176 Limited: retry.Limited{ 177 Delay: 100 * time.Millisecond, 178 Retries: 3, 179 }, 180 Multiplier: 2, 181 } 182 }) 183 } 184 185 // requestID constructs the idempotent requestID for the quota operation. 186 func requestID(runID common.RunID, op string) string { 187 return string(runID) + "/" + op 188 } 189 190 // findRunLimit identifies the UserLimit to use for the given run. 191 func findRunLimit(ctx context.Context, r *run.Run) (*cfgpb.UserLimit, error) { 192 project := r.ID.LUCIProject() 193 cfgGroup, err := prjcfg.GetConfigGroup(ctx, project, r.ConfigGroupID) 194 if err != nil { 195 return nil, err 196 } 197 198 config := cfgGroup.Content 199 if config == nil { 200 return nil, fmt.Errorf("cannot find cfgGroup content") 201 } 202 203 user := r.BilledTo 204 for _, userLimit := range config.GetUserLimits() { 205 if userLimit.GetRun() == nil { 206 continue 207 } 208 209 var groups []string 210 for _, principal := range userLimit.GetPrincipals() { 211 switch parts := strings.SplitN(principal, ":", 2); { 212 case len(parts) != 2: 213 // Each entry can be either an identity string "user:<email>" or a LUCI group reference "group:<name>". 214 return nil, fmt.Errorf("improper format for principal: %s", principal) 215 case parts[0] == "user" && parts[1] == user.Email(): 216 return userLimit, nil 217 case parts[0] == "group": 218 groups = append(groups, parts[1]) 219 } 220 } 221 222 if len(groups) == 0 { 223 continue 224 } 225 226 switch result, err := auth.GetState(ctx).DB().IsMember(ctx, user, groups); { 227 case err != nil: 228 return nil, err 229 case result: 230 return userLimit, nil 231 } 232 } 233 234 // Check default run limit if user is not a part of any defined user limit groups. 235 if config.GetUserLimitDefault().GetRun() != nil { 236 userLimit := config.GetUserLimitDefault() 237 238 // Override default userLimit name to `defaultUserLimit`. The name field 239 // is assumed to be `optional` and hence standardized to 240 // `defaultUserLimit`. This field is used by policyId. 241 userLimit.Name = defaultUserLimit 242 return userLimit, nil 243 } 244 245 // No limits configured for this user. 246 return nil, nil 247 } 248 249 // policyConfigID is a helper to generate quota policyConfigID. 250 func policyConfigID(realm, version string) *quotapb.PolicyConfigID { 251 return "apb.PolicyConfigID{ 252 AppId: appID, 253 Realm: realm, 254 Version: version, 255 } 256 } 257 258 // runPolicyKey is a helper to generate run quota policy key. 259 func runPolicyKey(configName, name string) *quotapb.PolicyKey { 260 return "apb.PolicyKey{ 261 Namespace: configName, 262 Name: name, 263 ResourceType: runResource, 264 } 265 } 266 267 // runPolicyEntry is a helper to generate a run quota policy entry. 268 func runPolicyEntry(polkey *quotapb.PolicyKey, limit uint64) *quotapb.PolicyConfig_Entry { 269 return "apb.PolicyConfig_Entry{ 270 Key: polkey, 271 Policy: "apb.Policy{ 272 Default: limit, 273 Limit: limit, 274 Lifetime: &durationpb.Duration{ 275 Seconds: int64(accountLifeTime.Seconds()), 276 }, 277 }, 278 } 279 } 280 281 // makeRunQuotaPolicies is a a helper to format run quota policies for the given config groups. 282 func makeRunQuotaPolicies(project string, configGroups []*prjcfg.ConfigGroup) []*quotapb.PolicyConfig_Entry { 283 var policies []*quotapb.PolicyConfig_Entry 284 285 for _, configGroup := range configGroups { 286 config := configGroup.Content 287 if config == nil { 288 continue 289 } 290 291 for _, userLimit := range config.GetUserLimits() { 292 runLimit := userLimit.GetRun() 293 if runLimit == nil { 294 continue 295 } 296 297 // limit is set to 0 when unlimited = True. The unlimited attribute 298 // will be handled by setting `IGNORE_POLICY_BOUNDS` for each 299 // quota op. 300 runLimitVal := uint64(runLimit.GetMaxActive().GetValue()) 301 polkey := runPolicyKey(config.GetName(), userLimit.GetName()) 302 303 policies = append(policies, runPolicyEntry(polkey, runLimitVal)) 304 } 305 306 // Add default run quota policy. 307 defaultRunLimit := config.GetUserLimitDefault().GetRun() 308 if defaultRunLimit == nil { 309 continue 310 } 311 312 // default limit is set to 0 when unlimited = True || not configured. 313 // Accounts using this policy will set `IGNORE_POLICY_BOUNDS` for each 314 // quota op. 315 defaultRunLimitVal := uint64(defaultRunLimit.GetMaxActive().GetValue()) 316 defaultkey := runPolicyKey(config.GetName(), defaultUserLimit) 317 318 policies = append(policies, runPolicyEntry(defaultkey, defaultRunLimitVal)) 319 } 320 321 return policies 322 } 323 324 // loadPolicies loads the given configGroups into server/quota. 325 // If the policy already exists within server/quota, this immediately returns the "apb.PolicyConfigID. 326 func (qm *Manager) loadPolicies(ctx context.Context, project string, configGroups []*prjcfg.ConfigGroup, version string) (*quotapb.PolicyConfigID, error) { 327 runQuotaPolicies := makeRunQuotaPolicies(project, configGroups) 328 if runQuotaPolicies == nil { 329 return nil, nil 330 } 331 332 // Load policies into server/quota. 333 return qm.qapp.LoadPoliciesManual(ctx, project, version, "apb.PolicyConfig{ 334 Policies: runQuotaPolicies, 335 }) 336 } 337 338 // WritePolicy writes lucicfg updates to the srvquota policies. 339 func (qm *Manager) WritePolicy(ctx context.Context, project string) (*quotapb.PolicyConfigID, error) { 340 // Get all config groups. 341 meta, err := prjcfg.GetLatestMeta(ctx, project) 342 if err != nil { 343 return nil, err 344 } 345 346 configGroups, err := meta.GetConfigGroups(ctx) 347 if err != nil { 348 return nil, err 349 } 350 351 return qm.loadPolicies(ctx, project, configGroups, meta.Hash()) 352 } 353 354 // NewManager creates a new quota manager. 355 func NewManager() *Manager { 356 qinit.Do(func() { 357 qapp = srvquota.Register(appID, &srvquota.ApplicationOptions{ 358 ResourceTypes: []string{runResource, tryjobResource}, 359 }) 360 }) 361 return &Manager{qapp: qapp} 362 }