go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/state/span.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package state 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "math" 22 "math/big" 23 "strings" 24 "time" 25 26 "cloud.google.com/go/spanner" 27 28 "go.chromium.org/luci/common/errors" 29 "go.chromium.org/luci/server/span" 30 31 "go.chromium.org/luci/analysis/internal/clustering" 32 cpb "go.chromium.org/luci/analysis/internal/clustering/proto" 33 "go.chromium.org/luci/analysis/internal/clustering/rules" 34 "go.chromium.org/luci/analysis/internal/config" 35 spanutil "go.chromium.org/luci/analysis/internal/span" 36 "go.chromium.org/luci/analysis/pbutil" 37 ) 38 39 // Entry represents the clustering state of a chunk, consisting of: 40 // - Metadata about what test results were clustered. 41 // - Metadata about how the test results were clustered (the algorithms 42 // and failure association rules used). 43 // - The clusters each test result are in. 44 type Entry struct { 45 // Project is the LUCI Project the chunk belongs to. 46 Project string 47 // ChunkID is the identity of the chunk of test results. 32 lowercase hexadecimal 48 // characters assigned by the ingestion process. 49 ChunkID string 50 // PartitionTime is the start of the retention period of the test results in the chunk. 51 PartitionTime time.Time 52 // ObjectID is the identity of the object in GCS containing the chunk's test results. 53 // 32 lowercase hexadecimal characters. 54 ObjectID string 55 // Clustering describes the latest clustering of test results in 56 // the chunk. 57 Clustering clustering.ClusterResults 58 // LastUpdated is the Spanner commit time the row was last updated. Output only. 59 LastUpdated time.Time 60 } 61 62 // NotFound is the error returned by Read if the row could not be found. 63 var NotFoundErr = errors.New("clustering state row not found") 64 65 // EndOfTable is the highest possible chunk ID that can be stored. 66 var EndOfTable = strings.Repeat("ff", 16) 67 68 // Create inserts clustering state for a chunk. Must be 69 // called in the context of a Spanner transaction. 70 func Create(ctx context.Context, e *Entry) error { 71 if err := validateEntry(e); err != nil { 72 return err 73 } 74 clusters, err := encodeClusters(e.Clustering.Algorithms, e.Clustering.Clusters) 75 if err != nil { 76 return err 77 } 78 ms := spanutil.InsertMap("ClusteringState", map[string]any{ 79 "Project": e.Project, 80 "ChunkID": e.ChunkID, 81 "PartitionTime": e.PartitionTime, 82 "ObjectID": e.ObjectID, 83 "AlgorithmsVersion": e.Clustering.AlgorithmsVersion, 84 "ConfigVersion": e.Clustering.ConfigVersion, 85 "RulesVersion": e.Clustering.RulesVersion, 86 "Clusters": clusters, 87 "LastUpdated": spanner.CommitTimestamp, 88 }) 89 span.BufferWrite(ctx, ms) 90 return nil 91 } 92 93 // ChunkKey represents the identify of a chunk. 94 type ChunkKey struct { 95 Project string 96 ChunkID string 97 } 98 99 // String returns a string representation of the key, for use in 100 // dictionaries. 101 func (k ChunkKey) String() string { 102 return fmt.Sprintf("%q/%q", k.Project, k.ChunkID) 103 } 104 105 // ReadLastUpdated reads the last updated time of the specified chunks. 106 // If the chunk does not exist, the zero time value time.Time{} is returned. 107 // Unless an error is returned, the returned slice will be of the same length 108 // as chunkIDs. The i-th LastUpdated time returned will correspond 109 // to the i-th chunk ID requested. 110 func ReadLastUpdated(ctx context.Context, keys []ChunkKey) ([]time.Time, error) { 111 var ks []spanner.Key 112 for _, key := range keys { 113 ks = append(ks, spanner.Key{key.Project, key.ChunkID}) 114 } 115 116 results := make(map[string]time.Time) 117 columns := []string{"Project", "ChunkID", "LastUpdated"} 118 it := span.Read(ctx, "ClusteringState", spanner.KeySetFromKeys(ks...), columns) 119 err := it.Do(func(r *spanner.Row) error { 120 var project string 121 var chunkID string 122 var lastUpdated time.Time 123 if err := r.Columns(&project, &chunkID, &lastUpdated); err != nil { 124 return errors.Annotate(err, "read clustering state row").Err() 125 } 126 key := ChunkKey{project, chunkID} 127 results[key.String()] = lastUpdated 128 return nil 129 }) 130 if err != nil { 131 return nil, err 132 } 133 result := make([]time.Time, len(keys)) 134 for i, key := range keys { 135 // If an entry does not exist in results, this will set the 136 // default value for *time.Time, which is nil. 137 result[i] = results[key.String()] 138 } 139 return result, nil 140 } 141 142 // UpdateClustering updates the clustering results on a chunk. 143 // 144 // To avoid clobbering other concurrent updates, the caller should read 145 // the LastUpdated time of the chunk in the same transaction as it is 146 // updated (i.e. using ReadLastUpdated) and verify it matches the previous 147 // entry passed. 148 // 149 // The update uses the previous entry to avoid writing cluster data 150 // if it has not changed, which optimises the performance of minor 151 // reclusterings. 152 func UpdateClustering(ctx context.Context, previous *Entry, update *clustering.ClusterResults) error { 153 if err := validateClusterResults(update); err != nil { 154 return err 155 } 156 157 upd := make(map[string]any) 158 upd["Project"] = previous.Project 159 upd["ChunkID"] = previous.ChunkID 160 upd["LastUpdated"] = spanner.CommitTimestamp 161 upd["AlgorithmsVersion"] = update.AlgorithmsVersion 162 upd["ConfigVersion"] = update.ConfigVersion 163 upd["RulesVersion"] = update.RulesVersion 164 165 if !clustering.AlgorithmsAndClustersEqual(&previous.Clustering, update) { 166 // Clusters is a field that may be many kilobytes in size. 167 // For efficiency, only write it to Spanner if it is changed. 168 clusters, err := encodeClusters(update.Algorithms, update.Clusters) 169 if err != nil { 170 return err 171 } 172 upd["Clusters"] = clusters 173 } 174 175 span.BufferWrite(ctx, spanutil.UpdateMap("ClusteringState", upd)) 176 return nil 177 } 178 179 // Read reads clustering state for a chunk. Must be 180 // called in the context of a Spanner transaction. If no clustering 181 // state exists, the method returns the error NotFound. 182 func Read(ctx context.Context, project, chunkID string) (*Entry, error) { 183 whereClause := "ChunkID = @chunkID" 184 params := make(map[string]any) 185 params["chunkID"] = chunkID 186 187 limit := 1 188 results, err := readWhere(ctx, project, whereClause, params, limit) 189 if err != nil { 190 return nil, err 191 } 192 if len(results) == 0 { 193 // Row does not exist. 194 return nil, NotFoundErr 195 } 196 return results[0], nil 197 } 198 199 // ReadNextOptions specifies options for ReadNextN. 200 type ReadNextOptions struct { 201 // The exclusive lower bound of the range of ChunkIDs to read. 202 // To read from the start of the table, leave this blank (""). 203 StartChunkID string 204 // The inclusive upper bound of the range of ChunkIDs to read. 205 // To specify the end of the table, use the constant EndOfTable. 206 EndChunkID string 207 // The minimum AlgorithmsVersion that re-clustering wants to achieve. 208 // If a row has an AlgorithmsVersion less than this value, it will 209 // be eligble to be read. 210 AlgorithmsVersion int64 211 // The minimum ConfigVersion that re-clustering wants to achieve. 212 // If a row has an RulesVersion less than this value, it will 213 // be eligble to be read. 214 ConfigVersion time.Time 215 // The minimum RulesVersion that re-clustering wants to achieve. 216 // If a row has an RulesVersion less than this value, it will 217 // be eligble to be read. 218 RulesVersion time.Time 219 } 220 221 // ReadNextN reads the n consecutively next clustering state entries 222 // matching ReadNextOptions. 223 func ReadNextN(ctx context.Context, project string, opts ReadNextOptions, n int) ([]*Entry, error) { 224 params := make(map[string]any) 225 whereClause := ` 226 ChunkId > @startChunkID AND ChunkId <= @endChunkID 227 AND (AlgorithmsVersion < @algorithmsVersion 228 OR ConfigVersion < @configVersion 229 OR RulesVersion < @rulesVersion) 230 ` 231 params["startChunkID"] = opts.StartChunkID 232 params["endChunkID"] = opts.EndChunkID 233 params["algorithmsVersion"] = opts.AlgorithmsVersion 234 params["configVersion"] = opts.ConfigVersion 235 params["rulesVersion"] = opts.RulesVersion 236 237 return readWhere(ctx, project, whereClause, params, n) 238 } 239 240 func readWhere(ctx context.Context, project, whereClause string, params map[string]any, limit int) ([]*Entry, error) { 241 stmt := spanner.NewStatement(` 242 SELECT 243 ChunkId, PartitionTime, ObjectId, 244 AlgorithmsVersion, 245 ConfigVersion, RulesVersion, 246 LastUpdated, Clusters 247 FROM ClusteringState 248 WHERE Project = @project AND (` + whereClause + `) 249 ORDER BY ChunkId 250 LIMIT @limit 251 `) 252 for k, v := range params { 253 stmt.Params[k] = v 254 } 255 stmt.Params["project"] = project 256 stmt.Params["limit"] = limit 257 258 it := span.Query(ctx, stmt) 259 var b spanutil.Buffer 260 results := []*Entry{} 261 err := it.Do(func(r *spanner.Row) error { 262 clusters := &cpb.ChunkClusters{} 263 result := &Entry{Project: project} 264 265 err := b.FromSpanner(r, 266 &result.ChunkID, &result.PartitionTime, &result.ObjectID, 267 &result.Clustering.AlgorithmsVersion, 268 &result.Clustering.ConfigVersion, &result.Clustering.RulesVersion, 269 &result.LastUpdated, clusters) 270 if err != nil { 271 return errors.Annotate(err, "read clustering state row").Err() 272 } 273 274 result.Clustering.Algorithms, result.Clustering.Clusters, err = decodeClusters(clusters) 275 if err != nil { 276 return errors.Annotate(err, "decode clusters").Err() 277 } 278 results = append(results, result) 279 return nil 280 }) 281 if err != nil { 282 return nil, err 283 } 284 return results, nil 285 } 286 287 // ReadProjects read all distinct projects with a clustering state entry.. 288 func ReadProjects(ctx context.Context) ([]string, error) { 289 stmt := spanner.NewStatement(` 290 SELECT Project 291 FROM ClusteringState 292 GROUP BY Project 293 `) 294 it := span.Query(ctx, stmt) 295 var projects []string 296 err := it.Do(func(r *spanner.Row) error { 297 var project string 298 if err := r.Columns(&project); err != nil { 299 return errors.Annotate(err, "read project row").Err() 300 } 301 projects = append(projects, project) 302 return nil 303 }) 304 if err != nil { 305 return nil, err 306 } 307 return projects, nil 308 } 309 310 // EstimateChunks estimates the total number of chunks in the ClusteringState 311 // table for the given project. 312 func EstimateChunks(ctx context.Context, project string) (int, error) { 313 stmt := spanner.NewStatement(` 314 SELECT ChunkId 315 FROM ClusteringState 316 WHERE Project = @project 317 ORDER BY ChunkId ASC 318 LIMIT 1 OFFSET 100 319 `) 320 stmt.Params["project"] = project 321 322 it := span.Query(ctx, stmt) 323 var chunkID string 324 err := it.Do(func(r *spanner.Row) error { 325 if err := r.Columns(&chunkID); err != nil { 326 return errors.Annotate(err, "read ChunkID row").Err() 327 } 328 return nil 329 }) 330 if err != nil { 331 return 0, err 332 } 333 if chunkID == "" { 334 // There was no 100th chunk ID. There must be less 335 // than 100 chunks in the project. 336 return 99, nil 337 } 338 return estimateChunksFromID(chunkID) 339 } 340 341 // estimateChunksFromID estimates the number of chunks in a project 342 // given the ID of the 100th chunk (in ascending keyspace order) in 343 // that project. The maximum estimate that will be returned is one 344 // billion. If there is no 100th chunk ID in the project, then 345 // there are clearly 99 chunks or less in the project. 346 func estimateChunksFromID(chunkID100 string) (int, error) { 347 const MaxEstimate = 1000 * 1000 * 1000 348 // This function uses the property that ChunkIDs are approximately 349 // uniformly distributed. We use the following estimator of the 350 // number of rows: 351 // 100 / (fraction of keyspace used up to 100th row) 352 // where fraction of keyspace used up to 100th row is: 353 // (ChunkID_100th + 1) / 2^128 354 // 355 // Where ChunkID_100th is the ChunkID of the 100th row (in keyspace 356 // order), as a 128-bit integer (rather than hexadecimal string). 357 // 358 // Rearranging this estimator, we get: 359 // 100 * 2^128 / (ChunkID_100th + 1) 360 361 // numerator = 100 * 2 ^ 128 362 var numerator big.Int 363 numerator.Lsh(big.NewInt(100), 128) 364 365 idBytes, err := hex.DecodeString(chunkID100) 366 if err != nil { 367 return 0, err 368 } 369 370 // denominator = ChunkID_100th + 1. We add one because 371 // the keyspace consumed includes the ID itself. 372 var denominator big.Int 373 denominator.SetBytes(idBytes) 374 denominator.Add(&denominator, big.NewInt(1)) 375 376 // estimate = numerator / denominator. 377 var estimate big.Int 378 estimate.Div(&numerator, &denominator) 379 380 result := uint64(math.MaxUint64) 381 if estimate.IsUint64() { 382 result = estimate.Uint64() 383 } 384 if result > MaxEstimate { 385 result = MaxEstimate 386 } 387 return int(result), nil 388 } 389 390 func validateEntry(e *Entry) error { 391 if err := pbutil.ValidateProject(e.Project); err != nil { 392 return errors.Annotate(err, "project").Err() 393 } 394 switch { 395 case !clustering.ChunkRe.MatchString(e.ChunkID): 396 return fmt.Errorf("chunk ID %q is not valid", e.ChunkID) 397 case e.PartitionTime.IsZero(): 398 return errors.New("partition time must be specified") 399 case e.ObjectID == "": 400 return errors.New("object ID must be specified") 401 default: 402 if err := validateClusterResults(&e.Clustering); err != nil { 403 return err 404 } 405 return nil 406 } 407 } 408 409 func validateClusterResults(c *clustering.ClusterResults) error { 410 switch { 411 case c.AlgorithmsVersion <= 0: 412 return errors.New("algorithms version must be specified") 413 case c.ConfigVersion.Before(config.StartingEpoch): 414 return errors.New("config version must be valid") 415 case c.RulesVersion.Before(rules.StartingEpoch): 416 return errors.New("rules version must be valid") 417 default: 418 if err := validateAlgorithms(c.Algorithms); err != nil { 419 return errors.Annotate(err, "algorithms").Err() 420 } 421 if err := validateClusters(c.Clusters, c.Algorithms); err != nil { 422 return errors.Annotate(err, "clusters").Err() 423 } 424 return nil 425 } 426 } 427 428 func validateAlgorithms(algorithms map[string]struct{}) error { 429 for a := range algorithms { 430 if !clustering.AlgorithmRe.MatchString(a) { 431 return fmt.Errorf("algorithm %q is not valid", a) 432 } 433 } 434 return nil 435 } 436 437 func validateClusters(clusters [][]clustering.ClusterID, algorithms map[string]struct{}) error { 438 if len(clusters) == 0 { 439 // Each chunk must have at least one test result, even 440 // if that test result is in no clusters. 441 return errors.New("there must be clustered test results in the chunk") 442 } 443 // Outer slice has on entry per test result. 444 for i, tr := range clusters { 445 // Inner slice has the list of clusters per test result. 446 for j, c := range tr { 447 if _, ok := algorithms[c.Algorithm]; !ok { 448 return fmt.Errorf("test result %v: cluster %v: algorithm not in algorithms list: %q", i, j, c.Algorithm) 449 } 450 if err := c.ValidateIDPart(); err != nil { 451 return errors.Annotate(err, "test result %v: cluster %v: cluster ID is not valid", i, j).Err() 452 } 453 } 454 if !clustering.ClustersAreSortedNoDuplicates(tr) { 455 return fmt.Errorf("test result %v: clusters are not sorted, or there are duplicates: %v", i, tr) 456 } 457 } 458 return nil 459 }