go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/quota/manager.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package quota
    16  
    17  import (
    18  	"context"
    19  	"crypto/md5"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"google.golang.org/protobuf/types/known/durationpb"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/errors"
    30  	"go.chromium.org/luci/common/retry"
    31  	"go.chromium.org/luci/common/retry/transient"
    32  	"go.chromium.org/luci/server/auth"
    33  	srvquota "go.chromium.org/luci/server/quota"
    34  	"go.chromium.org/luci/server/quota/quotapb"
    35  
    36  	cfgpb "go.chromium.org/luci/cv/api/config/v2"
    37  	"go.chromium.org/luci/cv/internal/common"
    38  	"go.chromium.org/luci/cv/internal/configs/prjcfg"
    39  	"go.chromium.org/luci/cv/internal/metrics"
    40  	"go.chromium.org/luci/cv/internal/run"
    41  )
    42  
    43  var qinit sync.Once
    44  var qapp SrvQuota
    45  
    46  const (
    47  	// appID to register with quota module.
    48  	appID = "cv"
    49  
    50  	// Resource types that the quota can use.
    51  	runResource    = "runs"
    52  	tryjobResource = "tryjobs"
    53  
    54  	defaultUserLimit = "default"
    55  
    56  	// Default lifetime of a quota account.
    57  	accountLifeTime = 3 * 24 * time.Hour // 3 days
    58  )
    59  
    60  // Manager manages the quota accounts for CV users.
    61  type Manager struct {
    62  	qapp SrvQuota
    63  }
    64  
    65  // SrvQuota manages quota
    66  type SrvQuota interface {
    67  	LoadPoliciesManual(ctx context.Context, realm string, version string, cfg *quotapb.PolicyConfig) (*quotapb.PolicyConfigID, error)
    68  	AccountID(realm, namespace, name, resourceType string) *quotapb.AccountID
    69  }
    70  
    71  // DebitRunQuota debits the run quota from a given user's account.
    72  func (qm *Manager) DebitRunQuota(ctx context.Context, r *run.Run) (*quotapb.OpResult, *cfgpb.UserLimit, error) {
    73  	return qm.runQuotaOp(ctx, r, "debit", -1)
    74  }
    75  
    76  // CreditRunQuota credits the run quota into a given user's account.
    77  func (qm *Manager) CreditRunQuota(ctx context.Context, r *run.Run) (*quotapb.OpResult, *cfgpb.UserLimit, error) {
    78  	return qm.runQuotaOp(ctx, r, "credit", 1)
    79  }
    80  
    81  // DebitTryjobQuota debits the tryjob quota from a given user's account.
    82  func (qm *Manager) DebitTryjobQuota(ctx context.Context) (*quotapb.OpResult, error) {
    83  	return nil, nil
    84  }
    85  
    86  // CreditTryjobQuota credits the tryjob quota into a given user's account.
    87  func (qm *Manager) CreditTryjobQuota(ctx context.Context) (*quotapb.OpResult, error) {
    88  	return nil, nil
    89  }
    90  
    91  // RunQuotaAccountID returns the account id of the run quota for the given run.
    92  func (qm *Manager) RunQuotaAccountID(r *run.Run) *quotapb.AccountID {
    93  	// The email is hashed using md5 for the account id to avoid PII.
    94  	emailHash := md5.Sum([]byte(r.BilledTo.Email()))
    95  	return qm.qapp.AccountID(r.ID.LUCIProject(), r.ConfigGroupID.Name(), hex.EncodeToString(emailHash[:]), runResource)
    96  }
    97  
    98  // runQuotaOp updates the run quota for the given run state by the given delta.
    99  func (qm *Manager) runQuotaOp(ctx context.Context, r *run.Run, opID string, delta int64) (*quotapb.OpResult, *cfgpb.UserLimit, error) {
   100  	userLimit, err := findRunLimit(ctx, r)
   101  
   102  	// userLimit == nil when no run user limit is configured for this user.
   103  	if err != nil || userLimit == nil {
   104  		return nil, nil, err
   105  	}
   106  
   107  	policyID := &quotapb.PolicyID{
   108  		Config: policyConfigID(r.ID.LUCIProject(), r.ConfigGroupID.Hash()),
   109  		Key:    runPolicyKey(r.ConfigGroupID.Name(), userLimit.GetName()),
   110  	}
   111  
   112  	// When policy is set to unlimited, the op is applied with IGNORE_POLICY_BOUNDS.
   113  	options := quotapb.Op_WITH_POLICY_LIMIT_DELTA
   114  	if userLimit.GetRun().GetMaxActive().GetUnlimited() {
   115  		options |= quotapb.Op_IGNORE_POLICY_BOUNDS
   116  	}
   117  
   118  	quotaOp := []*quotapb.Op{
   119  		{
   120  			AccountId:  qm.RunQuotaAccountID(r),
   121  			PolicyId:   policyID,
   122  			RelativeTo: quotapb.Op_CURRENT_BALANCE,
   123  			Delta:      delta,
   124  			Options:    uint32(options),
   125  		},
   126  	}
   127  
   128  	// When server/quota does not have the policyId already, rewrite the policy and retry the op.
   129  	var opResponse *quotapb.ApplyOpsResponse
   130  	err = retry.Retry(clock.Tag(ctx, common.LaunchRetryClockTag), makeRetryFactory(), func() (err error) {
   131  		opResponse, err = srvquota.ApplyOps(ctx, requestID(r.ID, opID), durationpb.New(accountLifeTime), quotaOp)
   132  		if errors.Unwrap(err) == srvquota.ErrQuotaApply && opResponse.Results[0].Status == quotapb.OpResult_ERR_UNKNOWN_POLICY {
   133  			if _, err := qm.WritePolicy(ctx, r.ID.LUCIProject()); err != nil {
   134  				return err
   135  			}
   136  
   137  			return errors.Annotate(err, "ApplyOps: ERR_UNKNOWN_POLICY").Tag(transient.Tag).Err()
   138  		}
   139  
   140  		return
   141  	}, nil)
   142  
   143  	if err == nil || errors.Unwrap(err) == srvquota.ErrQuotaApply {
   144  		metrics.Internal.QuotaOp.Add(
   145  			ctx,
   146  			1,
   147  			r.ID.LUCIProject(),
   148  			r.ConfigGroupID.Name(),
   149  			policyID.GetKey().GetName(),
   150  			runResource,
   151  			opID,
   152  			opResponse.Results[0].Status.String(),
   153  		)
   154  
   155  		// On ErrQuotaApply, OpResult.Status stores the reason for failure.
   156  		return opResponse.Results[0], userLimit, err
   157  	}
   158  
   159  	metrics.Internal.QuotaOp.Add(
   160  		ctx,
   161  		1,
   162  		r.ID.LUCIProject(),
   163  		r.ConfigGroupID.Name(),
   164  		policyID.GetKey().GetName(),
   165  		runResource,
   166  		opID,
   167  		"UNKNOWN_ERROR",
   168  	)
   169  
   170  	return nil, userLimit, err
   171  }
   172  
   173  func makeRetryFactory() retry.Factory {
   174  	return transient.Only(func() retry.Iterator {
   175  		return &retry.ExponentialBackoff{
   176  			Limited: retry.Limited{
   177  				Delay:   100 * time.Millisecond,
   178  				Retries: 3,
   179  			},
   180  			Multiplier: 2,
   181  		}
   182  	})
   183  }
   184  
   185  // requestID constructs the idempotent requestID for the quota operation.
   186  func requestID(runID common.RunID, op string) string {
   187  	return string(runID) + "/" + op
   188  }
   189  
   190  // findRunLimit identifies the UserLimit to use for the given run.
   191  func findRunLimit(ctx context.Context, r *run.Run) (*cfgpb.UserLimit, error) {
   192  	project := r.ID.LUCIProject()
   193  	cfgGroup, err := prjcfg.GetConfigGroup(ctx, project, r.ConfigGroupID)
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  
   198  	config := cfgGroup.Content
   199  	if config == nil {
   200  		return nil, fmt.Errorf("cannot find cfgGroup content")
   201  	}
   202  
   203  	user := r.BilledTo
   204  	for _, userLimit := range config.GetUserLimits() {
   205  		if userLimit.GetRun() == nil {
   206  			continue
   207  		}
   208  
   209  		var groups []string
   210  		for _, principal := range userLimit.GetPrincipals() {
   211  			switch parts := strings.SplitN(principal, ":", 2); {
   212  			case len(parts) != 2:
   213  				// Each entry can be either an identity string "user:<email>" or a LUCI group reference "group:<name>".
   214  				return nil, fmt.Errorf("improper format for principal: %s", principal)
   215  			case parts[0] == "user" && parts[1] == user.Email():
   216  				return userLimit, nil
   217  			case parts[0] == "group":
   218  				groups = append(groups, parts[1])
   219  			}
   220  		}
   221  
   222  		if len(groups) == 0 {
   223  			continue
   224  		}
   225  
   226  		switch result, err := auth.GetState(ctx).DB().IsMember(ctx, user, groups); {
   227  		case err != nil:
   228  			return nil, err
   229  		case result:
   230  			return userLimit, nil
   231  		}
   232  	}
   233  
   234  	// Check default run limit if user is not a part of any defined user limit groups.
   235  	if config.GetUserLimitDefault().GetRun() != nil {
   236  		userLimit := config.GetUserLimitDefault()
   237  
   238  		// Override default userLimit name to `defaultUserLimit`. The name field
   239  		// is assumed to be `optional` and hence standardized to
   240  		// `defaultUserLimit`. This field is used by policyId.
   241  		userLimit.Name = defaultUserLimit
   242  		return userLimit, nil
   243  	}
   244  
   245  	// No limits configured for this user.
   246  	return nil, nil
   247  }
   248  
   249  // policyConfigID is a helper to generate quota policyConfigID.
   250  func policyConfigID(realm, version string) *quotapb.PolicyConfigID {
   251  	return &quotapb.PolicyConfigID{
   252  		AppId:   appID,
   253  		Realm:   realm,
   254  		Version: version,
   255  	}
   256  }
   257  
   258  // runPolicyKey is a helper to generate run quota policy key.
   259  func runPolicyKey(configName, name string) *quotapb.PolicyKey {
   260  	return &quotapb.PolicyKey{
   261  		Namespace:    configName,
   262  		Name:         name,
   263  		ResourceType: runResource,
   264  	}
   265  }
   266  
   267  // runPolicyEntry is a helper to generate a run quota policy entry.
   268  func runPolicyEntry(polkey *quotapb.PolicyKey, limit uint64) *quotapb.PolicyConfig_Entry {
   269  	return &quotapb.PolicyConfig_Entry{
   270  		Key: polkey,
   271  		Policy: &quotapb.Policy{
   272  			Default: limit,
   273  			Limit:   limit,
   274  			Lifetime: &durationpb.Duration{
   275  				Seconds: int64(accountLifeTime.Seconds()),
   276  			},
   277  		},
   278  	}
   279  }
   280  
   281  // makeRunQuotaPolicies is a a helper to format run quota policies for the given config groups.
   282  func makeRunQuotaPolicies(project string, configGroups []*prjcfg.ConfigGroup) []*quotapb.PolicyConfig_Entry {
   283  	var policies []*quotapb.PolicyConfig_Entry
   284  
   285  	for _, configGroup := range configGroups {
   286  		config := configGroup.Content
   287  		if config == nil {
   288  			continue
   289  		}
   290  
   291  		for _, userLimit := range config.GetUserLimits() {
   292  			runLimit := userLimit.GetRun()
   293  			if runLimit == nil {
   294  				continue
   295  			}
   296  
   297  			// limit is set to 0 when unlimited = True. The unlimited attribute
   298  			// will be handled by setting `IGNORE_POLICY_BOUNDS` for each
   299  			// quota op.
   300  			runLimitVal := uint64(runLimit.GetMaxActive().GetValue())
   301  			polkey := runPolicyKey(config.GetName(), userLimit.GetName())
   302  
   303  			policies = append(policies, runPolicyEntry(polkey, runLimitVal))
   304  		}
   305  
   306  		// Add default run quota policy.
   307  		defaultRunLimit := config.GetUserLimitDefault().GetRun()
   308  		if defaultRunLimit == nil {
   309  			continue
   310  		}
   311  
   312  		// default limit is set to 0 when unlimited = True || not configured.
   313  		// Accounts using this policy will set `IGNORE_POLICY_BOUNDS` for each
   314  		// quota op.
   315  		defaultRunLimitVal := uint64(defaultRunLimit.GetMaxActive().GetValue())
   316  		defaultkey := runPolicyKey(config.GetName(), defaultUserLimit)
   317  
   318  		policies = append(policies, runPolicyEntry(defaultkey, defaultRunLimitVal))
   319  	}
   320  
   321  	return policies
   322  }
   323  
   324  // loadPolicies loads the given configGroups into server/quota.
   325  // If the policy already exists within server/quota, this immediately returns the &quotapb.PolicyConfigID.
   326  func (qm *Manager) loadPolicies(ctx context.Context, project string, configGroups []*prjcfg.ConfigGroup, version string) (*quotapb.PolicyConfigID, error) {
   327  	runQuotaPolicies := makeRunQuotaPolicies(project, configGroups)
   328  	if runQuotaPolicies == nil {
   329  		return nil, nil
   330  	}
   331  
   332  	// Load policies into server/quota.
   333  	return qm.qapp.LoadPoliciesManual(ctx, project, version, &quotapb.PolicyConfig{
   334  		Policies: runQuotaPolicies,
   335  	})
   336  }
   337  
   338  // WritePolicy writes lucicfg updates to the srvquota policies.
   339  func (qm *Manager) WritePolicy(ctx context.Context, project string) (*quotapb.PolicyConfigID, error) {
   340  	// Get all config groups.
   341  	meta, err := prjcfg.GetLatestMeta(ctx, project)
   342  	if err != nil {
   343  		return nil, err
   344  	}
   345  
   346  	configGroups, err := meta.GetConfigGroups(ctx)
   347  	if err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	return qm.loadPolicies(ctx, project, configGroups, meta.Hash())
   352  }
   353  
   354  // NewManager creates a new quota manager.
   355  func NewManager() *Manager {
   356  	qinit.Do(func() {
   357  		qapp = srvquota.Register(appID, &srvquota.ApplicationOptions{
   358  			ResourceTypes: []string{runResource, tryjobResource},
   359  		})
   360  	})
   361  	return &Manager{qapp: qapp}
   362  }