go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/configs/prjcfg/store.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package prjcfg
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"net/url"
    23  	"sort"
    24  	"strings"
    25  	"time"
    26  
    27  	"google.golang.org/protobuf/proto"
    28  
    29  	"go.chromium.org/luci/common/data/stringset"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/retry/transient"
    32  	"go.chromium.org/luci/common/sync/parallel"
    33  	"go.chromium.org/luci/gae/service/datastore"
    34  
    35  	cfgpb "go.chromium.org/luci/cv/api/config/v2"
    36  )
    37  
    38  const projectConfigKind string = "ProjectConfig"
    39  
    40  // SchemaVersion is the current DS schema version.
    41  //
    42  // Bump it to force-update Project configs and their Config Groups after the
    43  // next deployment.
    44  const SchemaVersion = 1
    45  
    46  // ProjectConfig is the root entity that keeps track of the latest version
    47  // info of the CV config for a LUCI Project. It only contains high-level
    48  // metadata about the config. The actual content of config is stored in the
    49  // `ConfigGroup` entities which can be looked up by constructing IDs using
    50  // `ConfigGroupNames` field.
    51  type ProjectConfig struct {
    52  	_kind string `gae:"$kind,ProjectConfig"`
    53  	// Project is the name of this LUCI Project.
    54  	Project string `gae:"$id"`
    55  	// SchemaVersion is the version of the schema.
    56  	//
    57  	// It is used to force-update old entities to newest format.
    58  	// See SchemaVersion const.
    59  	SchemaVersion int `gae:",noindex"`
    60  	// Enabled indicates whether CV is enabled for this LUCI Project.
    61  	//
    62  	// Project is disabled if it is de-registered in LUCI Config or it no longer
    63  	// has CV config file.
    64  	Enabled bool
    65  	// UpdateTime is the timestamp when this ProjectConfig was last updated.
    66  	UpdateTime time.Time `gae:",noindex"`
    67  	// EVersion is the latest version number of this ProjectConfig.
    68  	//
    69  	// It increments by 1 every time a new config change is imported to CV for
    70  	// this LUCI Project.
    71  	EVersion int64 `gae:",noindex"`
    72  	// Hash is a string computed from the content of latest imported CV Config
    73  	// using `ComputeHash()`.
    74  	Hash string `gae:",noindex"`
    75  	// ExternalHash is the hash string of this CV config in the external source
    76  	// of truth (currently, LUCI Config). Used to quickly decided whether the
    77  	// Config has been updated without fetching the full content.
    78  	ExternalHash string `gae:",noindex"`
    79  	// ConfigGroupNames are the names of all ConfigGroups in the current version
    80  	// of CV Config.
    81  	ConfigGroupNames []string `gae:",noindex"`
    82  }
    83  
    84  // ComputeHash computes the hash string of given CV Config and prefixed with
    85  // hash algorithm string. (e.g. sha256:deadbeefdeadbeef)
    86  //
    87  // The hash string is an hex-encoded string of the first 8 bytes (i.e. 16
    88  // char in length) of sha256(deterministically binary serialized Config proto).
    89  // Note that, deterministic marshalling does NOT guarantee the same output
    90  // for the equal proto message  across different language or event builds.
    91  // Therefore, in worst case scenario, when a newer version of proto lib is
    92  // deployed, CV may re-ingest functionally equivalent config.
    93  // See: https://godoc.org/google.golang.org/protobuf/proto#MarshalOptions
    94  func ComputeHash(cfg *cfgpb.Config) string {
    95  	b, err := proto.MarshalOptions{Deterministic: true}.Marshal(cfg)
    96  	if err != nil {
    97  		panic(fmt.Sprintf("failed to marshal config: %s", err))
    98  	}
    99  	sha := sha256.New()
   100  	sha.Write(b)
   101  	return fmt.Sprintf("sha256:%s", hex.EncodeToString(sha.Sum(nil)[:8]))
   102  }
   103  
   104  // ProjectConfigKey returns the ProjectConfig key for a given project.
   105  func ProjectConfigKey(ctx context.Context, project string) *datastore.Key {
   106  	return datastore.MakeKey(ctx, projectConfigKind, project)
   107  }
   108  
   109  // GetAllProjectIDs returns the names of all projects available in datastore.
   110  func GetAllProjectIDs(ctx context.Context, enabledOnly bool) ([]string, error) {
   111  	var projects []*ProjectConfig
   112  	query := datastore.NewQuery(projectConfigKind).Project("Enabled")
   113  	if err := datastore.GetAll(ctx, query, &projects); err != nil {
   114  		return nil, errors.Annotate(err, "failed to query all projects").Tag(transient.Tag).Err()
   115  	}
   116  	ret := make([]string, 0, len(projects))
   117  	for _, p := range projects {
   118  		if enabledOnly && !p.Enabled {
   119  			continue
   120  		}
   121  		ret = append(ret, p.Project)
   122  	}
   123  	sort.Strings(ret)
   124  	return ret, nil
   125  }
   126  
   127  // ConfigHashInfo stores high-level info about a ProjectConfig `Hash`.
   128  //
   129  // It is primarily used for cleanup purpose to decide which `Hash` and
   130  // its corresponding `ConfigGroup`s can be safely deleted.
   131  type ConfigHashInfo struct {
   132  	_kind string `gae:"$kind,ProjectConfigHashInfo"`
   133  	// Hash is the `Hash` of a `ProjectConfig` that CV has imported.
   134  	Hash    string         `gae:"$id"`
   135  	Project *datastore.Key `gae:"$parent"`
   136  	// SchemaVersion is the version of the schema.
   137  	//
   138  	// It is used to force-update old entities to newest format.
   139  	// See SchemaVersion const.
   140  	SchemaVersion int `gae:",noindex"`
   141  	// GitRevision is the git revision (commit hash) of the imported config.
   142  	GitRevision string `gae:",noindex"`
   143  	// ProjectEVersion is largest version of ProjectConfig that this `Hash`
   144  	// maps to.
   145  	//
   146  	// It is possible for a ConfigHash maps to multiple EVersions (e.g. a CV
   147  	// Config change is landed then reverted which results in two new EVersions
   148  	// but only one new Hash). Only the largest EVersion matters when cleanup
   149  	// job runs (i.e. CV will keep the last 5 EVersions).
   150  	ProjectEVersion int64 `gae:",noindex"`
   151  	// UpdateTime is the timestamp when this ConfigHashInfo was last updated.
   152  	UpdateTime time.Time `gae:",noindex"`
   153  	// ConfigGroupNames are the names of all ConfigGroups with this `Hash`.
   154  	ConfigGroupNames []string `gae:",noindex"`
   155  }
   156  
   157  // ConfigGroupID is the ID for ConfigGroup Entity.
   158  //
   159  // It is in the format of "hash/name" where
   160  //   - `hash` is the `Hash` field in the containing `ProjectConfig`.
   161  //   - `name` is the value of `ConfigGroup.Name`.
   162  type ConfigGroupID string
   163  
   164  // Hash returns Hash of the corresponding project config.
   165  func (c ConfigGroupID) Hash() string {
   166  	s := string(c)
   167  	if i := strings.IndexRune(s, '/'); i >= 0 {
   168  		return s[:i]
   169  	}
   170  	panic(fmt.Errorf("invalid ConfigGroupID %q", c))
   171  }
   172  
   173  // Name returns name component only.
   174  func (c ConfigGroupID) Name() string {
   175  	s := string(c)
   176  	if i := strings.IndexRune(s, '/'); i >= 0 {
   177  		return s[i+1:]
   178  	}
   179  	panic(fmt.Errorf("invalid ConfigGroupID %q", c))
   180  }
   181  
   182  // MakeConfigGroupID creates ConfigGroupID.
   183  func MakeConfigGroupID(hash, name string) ConfigGroupID {
   184  	if name == "" {
   185  		panic(fmt.Errorf("name must be given"))
   186  	}
   187  	return ConfigGroupID(fmt.Sprintf("%s/%s", hash, name))
   188  }
   189  
   190  // ConfigGroup is an entity that represents a ConfigGroup defined in CV config.
   191  type ConfigGroup struct {
   192  	_kind   string         `gae:"$kind,ProjectConfigGroup"`
   193  	Project *datastore.Key `gae:"$parent"`
   194  	ID      ConfigGroupID  `gae:"$id"`
   195  	// SchemaVersion is the version of the schema.
   196  	//
   197  	// It is used to force-update old entities to newest format.
   198  	// See SchemaVersion const.
   199  	SchemaVersion int `gae:",noindex"`
   200  	// DrainingStartTime represents `draining_start_time` in the CV config.
   201  	//
   202  	// Note that this is a project-level field. Therefore, all ConfigGroups in a
   203  	// single version of config should have the same value.
   204  	DrainingStartTime string `gae:",noindex"`
   205  	// SubmitOptions represents `submit_options` field in the CV config.
   206  	//
   207  	// Note that this is currently a project-level field. Therefore, all
   208  	// ConfigGroups in a single version of Config should have the same value.
   209  	SubmitOptions *cfgpb.SubmitOptions
   210  	// Content represents a `pb.ConfigGroup` proto message defined in the CV
   211  	// config
   212  	Content *cfgpb.ConfigGroup
   213  	// CQStatusHost is the URL of the CQ status app. Optional.
   214  	//
   215  	// Deprecated.
   216  	// TODO(crbug/1233963): remove this field.
   217  	CQStatusHost string `gae:",noindex"`
   218  }
   219  
   220  // ProjectString returns LUCI Project as a string.
   221  func (c *ConfigGroup) ProjectString() string {
   222  	return c.Project.StringID()
   223  }
   224  
   225  // GetAllGerritHosts returns a map of the Gerrit hosts watched by enabled LUCI
   226  // projects.
   227  func GetAllGerritHosts(ctx context.Context) (map[string]stringset.Set, error) {
   228  	var prjs []*ProjectConfig
   229  	q := datastore.NewQuery(projectConfigKind).Eq("Enabled", true)
   230  	if err := datastore.GetAll(ctx, q, &prjs); err != nil {
   231  		return nil, transient.Tag.Apply(err)
   232  	}
   233  
   234  	ret := make(map[string]stringset.Set)
   235  	err := parallel.WorkPool(32, func(work chan<- func() error) {
   236  		for _, p := range prjs {
   237  			hosts := stringset.New(4)
   238  			ret[p.Project] = hosts
   239  			p := p
   240  			work <- func() error {
   241  				err := addGerritHosts(ctx, p, hosts)
   242  				return errors.Annotate(err, "%s: addGerritHosts", p.Project).Err()
   243  			}
   244  		}
   245  	})
   246  	return ret, err
   247  }
   248  
   249  func addGerritHosts(ctx context.Context, prj *ProjectConfig, hosts stringset.Set) error {
   250  	const chunkSize = 16
   251  	cgs := make([]*ConfigGroup, 0, chunkSize)
   252  	pck := ProjectConfigKey(ctx, prj.Project)
   253  
   254  	for offset := 0; offset < len(prj.ConfigGroupNames); offset += chunkSize {
   255  		// prepare ConfigGroup(s) upto chunkSize.
   256  		end := offset + chunkSize
   257  		if end > len(prj.ConfigGroupNames) {
   258  			end = len(prj.ConfigGroupNames)
   259  		}
   260  		cgs = cgs[:0]
   261  		for pos := offset; pos < end; pos++ {
   262  			cgs = append(cgs, &ConfigGroup{
   263  				ID:      MakeConfigGroupID(prj.Hash, prj.ConfigGroupNames[pos]),
   264  				Project: pck,
   265  			})
   266  		}
   267  		if err := datastore.Get(ctx, cgs); err != nil {
   268  			return errors.Annotate(err, "fetching ConfigGroups for %s",
   269  				strings.Join(prj.ConfigGroupNames[offset:end], ",")).Tag(transient.Tag).Err()
   270  		}
   271  
   272  		// parse and add all the hosts.
   273  		for _, cg := range cgs {
   274  			for _, repo := range cg.Content.GetGerrit() {
   275  				rawURL := repo.GetUrl()
   276  				u, err := url.Parse(rawURL)
   277  				if err != nil {
   278  					// must be a bug in the project config validator.
   279  					return errors.Annotate(err, "%s: invalid GerritURL %q",
   280  						cg.ID.Name(), rawURL).Err()
   281  				}
   282  				if u.Host == "" {
   283  					// same; must be a bug.
   284  					return errors.Reason("%s: empty GerritHost %q",
   285  						cg.ID.Name(), rawURL).Err()
   286  				}
   287  				hosts.Add(u.Host)
   288  			}
   289  		}
   290  	}
   291  	return nil
   292  }