go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/configs/prjcfg/store.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package prjcfg 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/hex" 21 "fmt" 22 "net/url" 23 "sort" 24 "strings" 25 "time" 26 27 "google.golang.org/protobuf/proto" 28 29 "go.chromium.org/luci/common/data/stringset" 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/retry/transient" 32 "go.chromium.org/luci/common/sync/parallel" 33 "go.chromium.org/luci/gae/service/datastore" 34 35 cfgpb "go.chromium.org/luci/cv/api/config/v2" 36 ) 37 38 const projectConfigKind string = "ProjectConfig" 39 40 // SchemaVersion is the current DS schema version. 41 // 42 // Bump it to force-update Project configs and their Config Groups after the 43 // next deployment. 44 const SchemaVersion = 1 45 46 // ProjectConfig is the root entity that keeps track of the latest version 47 // info of the CV config for a LUCI Project. It only contains high-level 48 // metadata about the config. The actual content of config is stored in the 49 // `ConfigGroup` entities which can be looked up by constructing IDs using 50 // `ConfigGroupNames` field. 51 type ProjectConfig struct { 52 _kind string `gae:"$kind,ProjectConfig"` 53 // Project is the name of this LUCI Project. 54 Project string `gae:"$id"` 55 // SchemaVersion is the version of the schema. 56 // 57 // It is used to force-update old entities to newest format. 58 // See SchemaVersion const. 59 SchemaVersion int `gae:",noindex"` 60 // Enabled indicates whether CV is enabled for this LUCI Project. 61 // 62 // Project is disabled if it is de-registered in LUCI Config or it no longer 63 // has CV config file. 64 Enabled bool 65 // UpdateTime is the timestamp when this ProjectConfig was last updated. 66 UpdateTime time.Time `gae:",noindex"` 67 // EVersion is the latest version number of this ProjectConfig. 68 // 69 // It increments by 1 every time a new config change is imported to CV for 70 // this LUCI Project. 71 EVersion int64 `gae:",noindex"` 72 // Hash is a string computed from the content of latest imported CV Config 73 // using `ComputeHash()`. 74 Hash string `gae:",noindex"` 75 // ExternalHash is the hash string of this CV config in the external source 76 // of truth (currently, LUCI Config). Used to quickly decided whether the 77 // Config has been updated without fetching the full content. 78 ExternalHash string `gae:",noindex"` 79 // ConfigGroupNames are the names of all ConfigGroups in the current version 80 // of CV Config. 81 ConfigGroupNames []string `gae:",noindex"` 82 } 83 84 // ComputeHash computes the hash string of given CV Config and prefixed with 85 // hash algorithm string. (e.g. sha256:deadbeefdeadbeef) 86 // 87 // The hash string is an hex-encoded string of the first 8 bytes (i.e. 16 88 // char in length) of sha256(deterministically binary serialized Config proto). 89 // Note that, deterministic marshalling does NOT guarantee the same output 90 // for the equal proto message across different language or event builds. 91 // Therefore, in worst case scenario, when a newer version of proto lib is 92 // deployed, CV may re-ingest functionally equivalent config. 93 // See: https://godoc.org/google.golang.org/protobuf/proto#MarshalOptions 94 func ComputeHash(cfg *cfgpb.Config) string { 95 b, err := proto.MarshalOptions{Deterministic: true}.Marshal(cfg) 96 if err != nil { 97 panic(fmt.Sprintf("failed to marshal config: %s", err)) 98 } 99 sha := sha256.New() 100 sha.Write(b) 101 return fmt.Sprintf("sha256:%s", hex.EncodeToString(sha.Sum(nil)[:8])) 102 } 103 104 // ProjectConfigKey returns the ProjectConfig key for a given project. 105 func ProjectConfigKey(ctx context.Context, project string) *datastore.Key { 106 return datastore.MakeKey(ctx, projectConfigKind, project) 107 } 108 109 // GetAllProjectIDs returns the names of all projects available in datastore. 110 func GetAllProjectIDs(ctx context.Context, enabledOnly bool) ([]string, error) { 111 var projects []*ProjectConfig 112 query := datastore.NewQuery(projectConfigKind).Project("Enabled") 113 if err := datastore.GetAll(ctx, query, &projects); err != nil { 114 return nil, errors.Annotate(err, "failed to query all projects").Tag(transient.Tag).Err() 115 } 116 ret := make([]string, 0, len(projects)) 117 for _, p := range projects { 118 if enabledOnly && !p.Enabled { 119 continue 120 } 121 ret = append(ret, p.Project) 122 } 123 sort.Strings(ret) 124 return ret, nil 125 } 126 127 // ConfigHashInfo stores high-level info about a ProjectConfig `Hash`. 128 // 129 // It is primarily used for cleanup purpose to decide which `Hash` and 130 // its corresponding `ConfigGroup`s can be safely deleted. 131 type ConfigHashInfo struct { 132 _kind string `gae:"$kind,ProjectConfigHashInfo"` 133 // Hash is the `Hash` of a `ProjectConfig` that CV has imported. 134 Hash string `gae:"$id"` 135 Project *datastore.Key `gae:"$parent"` 136 // SchemaVersion is the version of the schema. 137 // 138 // It is used to force-update old entities to newest format. 139 // See SchemaVersion const. 140 SchemaVersion int `gae:",noindex"` 141 // GitRevision is the git revision (commit hash) of the imported config. 142 GitRevision string `gae:",noindex"` 143 // ProjectEVersion is largest version of ProjectConfig that this `Hash` 144 // maps to. 145 // 146 // It is possible for a ConfigHash maps to multiple EVersions (e.g. a CV 147 // Config change is landed then reverted which results in two new EVersions 148 // but only one new Hash). Only the largest EVersion matters when cleanup 149 // job runs (i.e. CV will keep the last 5 EVersions). 150 ProjectEVersion int64 `gae:",noindex"` 151 // UpdateTime is the timestamp when this ConfigHashInfo was last updated. 152 UpdateTime time.Time `gae:",noindex"` 153 // ConfigGroupNames are the names of all ConfigGroups with this `Hash`. 154 ConfigGroupNames []string `gae:",noindex"` 155 } 156 157 // ConfigGroupID is the ID for ConfigGroup Entity. 158 // 159 // It is in the format of "hash/name" where 160 // - `hash` is the `Hash` field in the containing `ProjectConfig`. 161 // - `name` is the value of `ConfigGroup.Name`. 162 type ConfigGroupID string 163 164 // Hash returns Hash of the corresponding project config. 165 func (c ConfigGroupID) Hash() string { 166 s := string(c) 167 if i := strings.IndexRune(s, '/'); i >= 0 { 168 return s[:i] 169 } 170 panic(fmt.Errorf("invalid ConfigGroupID %q", c)) 171 } 172 173 // Name returns name component only. 174 func (c ConfigGroupID) Name() string { 175 s := string(c) 176 if i := strings.IndexRune(s, '/'); i >= 0 { 177 return s[i+1:] 178 } 179 panic(fmt.Errorf("invalid ConfigGroupID %q", c)) 180 } 181 182 // MakeConfigGroupID creates ConfigGroupID. 183 func MakeConfigGroupID(hash, name string) ConfigGroupID { 184 if name == "" { 185 panic(fmt.Errorf("name must be given")) 186 } 187 return ConfigGroupID(fmt.Sprintf("%s/%s", hash, name)) 188 } 189 190 // ConfigGroup is an entity that represents a ConfigGroup defined in CV config. 191 type ConfigGroup struct { 192 _kind string `gae:"$kind,ProjectConfigGroup"` 193 Project *datastore.Key `gae:"$parent"` 194 ID ConfigGroupID `gae:"$id"` 195 // SchemaVersion is the version of the schema. 196 // 197 // It is used to force-update old entities to newest format. 198 // See SchemaVersion const. 199 SchemaVersion int `gae:",noindex"` 200 // DrainingStartTime represents `draining_start_time` in the CV config. 201 // 202 // Note that this is a project-level field. Therefore, all ConfigGroups in a 203 // single version of config should have the same value. 204 DrainingStartTime string `gae:",noindex"` 205 // SubmitOptions represents `submit_options` field in the CV config. 206 // 207 // Note that this is currently a project-level field. Therefore, all 208 // ConfigGroups in a single version of Config should have the same value. 209 SubmitOptions *cfgpb.SubmitOptions 210 // Content represents a `pb.ConfigGroup` proto message defined in the CV 211 // config 212 Content *cfgpb.ConfigGroup 213 // CQStatusHost is the URL of the CQ status app. Optional. 214 // 215 // Deprecated. 216 // TODO(crbug/1233963): remove this field. 217 CQStatusHost string `gae:",noindex"` 218 } 219 220 // ProjectString returns LUCI Project as a string. 221 func (c *ConfigGroup) ProjectString() string { 222 return c.Project.StringID() 223 } 224 225 // GetAllGerritHosts returns a map of the Gerrit hosts watched by enabled LUCI 226 // projects. 227 func GetAllGerritHosts(ctx context.Context) (map[string]stringset.Set, error) { 228 var prjs []*ProjectConfig 229 q := datastore.NewQuery(projectConfigKind).Eq("Enabled", true) 230 if err := datastore.GetAll(ctx, q, &prjs); err != nil { 231 return nil, transient.Tag.Apply(err) 232 } 233 234 ret := make(map[string]stringset.Set) 235 err := parallel.WorkPool(32, func(work chan<- func() error) { 236 for _, p := range prjs { 237 hosts := stringset.New(4) 238 ret[p.Project] = hosts 239 p := p 240 work <- func() error { 241 err := addGerritHosts(ctx, p, hosts) 242 return errors.Annotate(err, "%s: addGerritHosts", p.Project).Err() 243 } 244 } 245 }) 246 return ret, err 247 } 248 249 func addGerritHosts(ctx context.Context, prj *ProjectConfig, hosts stringset.Set) error { 250 const chunkSize = 16 251 cgs := make([]*ConfigGroup, 0, chunkSize) 252 pck := ProjectConfigKey(ctx, prj.Project) 253 254 for offset := 0; offset < len(prj.ConfigGroupNames); offset += chunkSize { 255 // prepare ConfigGroup(s) upto chunkSize. 256 end := offset + chunkSize 257 if end > len(prj.ConfigGroupNames) { 258 end = len(prj.ConfigGroupNames) 259 } 260 cgs = cgs[:0] 261 for pos := offset; pos < end; pos++ { 262 cgs = append(cgs, &ConfigGroup{ 263 ID: MakeConfigGroupID(prj.Hash, prj.ConfigGroupNames[pos]), 264 Project: pck, 265 }) 266 } 267 if err := datastore.Get(ctx, cgs); err != nil { 268 return errors.Annotate(err, "fetching ConfigGroups for %s", 269 strings.Join(prj.ConfigGroupNames[offset:end], ",")).Tag(transient.Tag).Err() 270 } 271 272 // parse and add all the hosts. 273 for _, cg := range cgs { 274 for _, repo := range cg.Content.GetGerrit() { 275 rawURL := repo.GetUrl() 276 u, err := url.Parse(rawURL) 277 if err != nil { 278 // must be a bug in the project config validator. 279 return errors.Annotate(err, "%s: invalid GerritURL %q", 280 cg.ID.Name(), rawURL).Err() 281 } 282 if u.Host == "" { 283 // same; must be a bug. 284 return errors.Reason("%s: empty GerritHost %q", 285 cg.ID.Name(), rawURL).Err() 286 } 287 hosts.Add(u.Host) 288 } 289 } 290 } 291 return nil 292 }