github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/api/query/cache/index/index.go (about) 1 // Copyright 2018 The WPT Dashboard Project. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // nolint:godox // TODO(Hexcles): Extract type RunID to another package (shared) so that Index 6 // can be mocked into a different package without cyclic imports. 7 8 package index 9 10 import ( 11 "context" 12 "encoding/json" 13 "errors" 14 "fmt" 15 "io" 16 "math" 17 "net/http" 18 "sync" 19 20 mapset "github.com/deckarep/golang-set" 21 "github.com/web-platform-tests/wpt.fyi/api/query" 22 "github.com/web-platform-tests/wpt.fyi/api/query/cache/lru" 23 "github.com/web-platform-tests/wpt.fyi/shared" 24 "github.com/web-platform-tests/wpt.fyi/shared/metrics" 25 26 "github.com/sirupsen/logrus" 27 ) 28 29 var ( 30 errNilRun = errors.New("Test run is nil") 31 errNoQuery = errors.New("No query provided") 32 errNoRuns = errors.New("No runs") 33 errRunExists = errors.New("Run already exists in index") 34 errRunLoading = errors.New("Run currently being loaded into index") 35 errSomeShardsRequired = errors.New("Index must have at least one shard") 36 errZeroRun = errors.New("Cannot ingest run with ID of 0") 37 errEmptyReport = errors.New("Report contains no results") 38 ) 39 40 // ErrRunExists returns the error associated with an attempt to perform 41 // operations on a run currently unknown to an Index when the Index, in fact, 42 // already knows about the run. 43 func ErrRunExists() error { 44 return errRunExists 45 } 46 47 // ErrRunLoading returns the error associated with an attempt to perform 48 // operations on a run currently unknown to an Index when the Index, in fact, 49 // is currently loading data associated with the run. 50 func ErrRunLoading() error { 51 return errRunLoading 52 } 53 54 // Index is an index of test run results that can ingest and evict runs. 55 type Index interface { 56 query.Binder 57 58 // Run loads the metadata associated with the given RunID value. It returns 59 // an error if the Index does not understand the given RunID value. 60 Run(RunID) (shared.TestRun, error) 61 // Runs loads the metadata associated with the given RunID values. It returns 62 // an error if the Index does not understand one or more of the given RunID 63 // values. 64 Runs([]RunID) ([]shared.TestRun, error) 65 // IngestRun loads the test run results associated with the input test run 66 // into the index. 67 IngestRun(shared.TestRun) error 68 // EvictRuns reduces memory pressure by evicting the cache's choice of runs 69 // from memory. The parameter is a percentage of current runs to evict. 70 EvictRuns(float64) (int, error) 71 // SetIndexChan sets the channel that synchronizes before ingesting a run. 72 // This channel is used by index monitors to ensure that the monitor is 73 // scheduled to run frequently enough to keep pace with any influx of ingested 74 // runs. 75 SetIngestChan(chan bool) 76 } 77 78 // ProxyIndex is a proxy implementation of the Index interface. This type is 79 // generally used in type embeddings that wish to override the behaviour of some 80 // (but not all) methods, deferring to the delegate for all other behaviours. 81 type ProxyIndex struct { 82 delegate Index 83 } 84 85 // Run loads the metadata for the given run ID value by deferring to the 86 // proxy's delegate. 87 func (i *ProxyIndex) Run(id RunID) (shared.TestRun, error) { 88 return i.delegate.Run(id) 89 } 90 91 // Runs loads the metadata for the given run ID values by deferring to the 92 // proxy's delegate. 93 func (i *ProxyIndex) Runs(ids []RunID) ([]shared.TestRun, error) { 94 return i.delegate.Runs(ids) 95 } 96 97 // IngestRun loads the given run's results in to the index by deferring to the 98 // proxy's delegate. 99 func (i *ProxyIndex) IngestRun(r shared.TestRun) error { 100 return i.delegate.IngestRun(r) 101 } 102 103 // EvictRuns deletes percent% runs from the index by deferring to the proxy's 104 // delegate. 105 func (i *ProxyIndex) EvictRuns(percent float64) (int, error) { 106 return i.delegate.EvictRuns(percent) 107 } 108 109 // SetIngestChan sets the channel that synchronizes before ingesting a run by 110 // deferring to the proxy's delegate. 111 func (i *ProxyIndex) SetIngestChan(c chan bool) { 112 i.delegate.SetIngestChan(c) 113 } 114 115 // NewProxyIndex instantiates a new proxy index bound to the given delegate. 116 func NewProxyIndex(idx Index) ProxyIndex { 117 return ProxyIndex{idx} 118 } 119 120 // ReportLoader handles loading a WPT test results report based on metadata in 121 // a shared.TestRun. 122 type ReportLoader interface { 123 Load(shared.TestRun) (*metrics.TestResultsReport, error) 124 } 125 126 // shardedWPTIndex is an Index that manages test and result data across mutually 127 // exclusive shards. 128 type shardedWPTIndex struct { 129 runs map[RunID]shared.TestRun 130 lru lru.LRU 131 inFlight mapset.Set 132 loader ReportLoader 133 shards []*wptIndex 134 m *sync.RWMutex 135 c chan bool 136 } 137 138 // wptIndex is an index of tests and results. Multicore machines should use 139 // shardedWPTIndex, which embed a slice of wptIndex containing mutually 140 // exclusive subsets of test and result data. 141 type wptIndex struct { 142 tests Tests 143 results Results 144 m *sync.RWMutex 145 } 146 147 // testData is a wrapper for a single unit of test+result data from a test run. 148 type testData struct { 149 testName 150 ResultID 151 } 152 153 // HTTPReportLoader loads WPT test run reports from the URL specified in test 154 // run metadata. 155 type HTTPReportLoader struct{} 156 157 func (i *shardedWPTIndex) Run(id RunID) (shared.TestRun, error) { 158 return i.syncGetRun(id) 159 } 160 161 func (i *shardedWPTIndex) Runs(ids []RunID) ([]shared.TestRun, error) { 162 return i.syncGetRuns(ids) 163 } 164 165 func (i *shardedWPTIndex) IngestRun(r shared.TestRun) error { 166 // Error cases: ID cannot be 0, run cannot be loaded or loading-in-progress. 167 if r.ID == 0 { 168 return errZeroRun 169 } 170 171 // Synchronize with anything that may be monitoring run ingestion. Do this 172 // before any i.sync* routines to avoid deadlock. 173 if i.c != nil { 174 i.c <- true 175 } 176 177 if err := i.syncMarkInProgress(r); err != nil { 178 return err 179 } 180 defer func() { 181 if err := i.syncClearInProgress(r); err != nil { 182 logrus.Warningf("Sync clear error: %s", err.Error()) 183 } 184 }() 185 186 // Delegate loader to construct complete run report. 187 report, err := i.loader.Load(r) 188 if err != nil && !errors.Is(err, errEmptyReport) { 189 return err 190 } 191 192 // Results of different tests will be stored in different shards, based on the 193 // top-level test (i.e., not subtests) integral ID of each test in the report. 194 // 195 // Create RunResults for each shard's partition of this run's results. 196 numShards := len(i.shards) 197 numShardsU64 := uint64(numShards) 198 shardData := make([]map[TestID]testData, numShards) 199 for j := 0; j < numShards; j++ { 200 shardData[j] = make(map[TestID]testData) 201 } 202 203 for _, res := range report.Results { 204 // Add top-level test (i.e., not subtest) result to appropriate shard. 205 t, err := computeTestID(res.Test, nil) 206 if err != nil { 207 return err 208 } 209 210 shardIdx := int(t.testID % numShardsU64) 211 dataForShard := shardData[shardIdx] 212 re := ResultID(shared.TestStatusValueFromString(res.Status)) 213 dataForShard[t] = testData{ 214 testName: testName{ 215 name: res.Test, 216 subName: nil, 217 }, 218 ResultID: re, 219 } 220 221 // Dedup subtests, warning when subtest names are duplicated. 222 subs := make(map[string]metrics.SubTest) 223 for _, sub := range res.Subtests { 224 if _, ok := subs[sub.Name]; ok { 225 logrus.Warningf("Duplicate subtests with the same name: %s %s", res.Test, sub.Name) 226 227 continue 228 } 229 subs[sub.Name] = sub 230 } 231 232 // Add each subtests' result to the appropriate shard (same shard as 233 // top-level test). 234 for i := range subs { 235 name := subs[i].Name 236 t, err := computeTestID(res.Test, &name) 237 if err != nil { 238 return err 239 } 240 241 re := ResultID(shared.TestStatusValueFromString(subs[i].Status)) 242 dataForShard[t] = testData{ 243 testName: testName{ 244 name: res.Test, 245 subName: &name, 246 }, 247 ResultID: re, 248 } 249 } 250 } 251 252 if err := i.syncStoreRun(r, shardData); err != nil { 253 logrus.Warningf("Sync store run error: %s", err.Error()) 254 } 255 256 return nil 257 } 258 259 func (i *shardedWPTIndex) EvictRuns(percent float64) (int, error) { 260 return i.syncEvictRuns(math.Max(0.0, math.Min(1.0, percent))) 261 } 262 263 // nolint:ireturn // TODO: Fix ireturn lint error 264 func (i *shardedWPTIndex) Bind(runs []shared.TestRun, q query.ConcreteQuery) (query.Plan, error) { 265 if len(runs) == 0 { 266 return nil, errNoRuns 267 } else if q == nil { 268 return nil, errNoQuery 269 } 270 271 ids := make([]RunID, len(runs)) 272 for j, run := range runs { 273 ids[j] = RunID(run.ID) 274 } 275 idxs, err := i.syncExtractRuns(ids) 276 if err != nil { 277 return nil, err 278 } 279 280 fs := make(ShardedFilter, len(idxs)) 281 for j, idx := range idxs { 282 f, err := newFilter(idx, q) 283 if err != nil { 284 return nil, err 285 } 286 fs[j] = f 287 } 288 289 return fs, nil 290 } 291 292 func (i *shardedWPTIndex) SetIngestChan(c chan bool) { 293 i.c = c 294 } 295 296 // Load for HTTPReportLoader loads WPT test run reports from the URL specified 297 // in test run metadata. 298 func (l HTTPReportLoader) Load(run shared.TestRun) (*metrics.TestResultsReport, error) { 299 // Attempt to fetch-and-unmarshal run from run.RawResultsURL. 300 req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, run.RawResultsURL, nil) 301 if err != nil { 302 return nil, fmt.Errorf("failed to create GET request for Results URL: %w", err) 303 } 304 resp, err := http.DefaultClient.Do(req) 305 if err != nil { 306 return nil, err 307 } 308 defer resp.Body.Close() 309 if resp.StatusCode != http.StatusOK { 310 err = fmt.Errorf(`Non-OK HTTP status code of %d from "%s" for run ID=%d`, resp.StatusCode, run.RawResultsURL, run.ID) 311 312 return nil, err 313 } 314 data, err := io.ReadAll(resp.Body) 315 if err != nil { 316 return nil, err 317 } 318 var report metrics.TestResultsReport 319 err = json.Unmarshal(data, &report) 320 if err != nil { 321 return nil, err 322 } 323 if len(report.Results) == 0 { 324 return &report, errEmptyReport 325 } 326 327 return &report, nil 328 } 329 330 // NewShardedWPTIndex creates a new empty Index for WPT test run results. 331 // nolint:ireturn // TODO: Fix ireturn lint error 332 func NewShardedWPTIndex(loader ReportLoader, numShards int) (Index, error) { 333 if numShards <= 0 { 334 return nil, errSomeShardsRequired 335 } 336 337 shards := make([]*wptIndex, 0, numShards) 338 for i := 0; i < numShards; i++ { 339 tests := NewTests() 340 shards = append(shards, newWPTIndex(tests)) 341 } 342 343 // nolint:exhaustruct // TODO: Fix exhaustruct lint error. 344 return &shardedWPTIndex{ 345 runs: make(map[RunID]shared.TestRun), 346 lru: lru.NewLRU(), 347 inFlight: mapset.NewSet(), 348 loader: loader, 349 shards: shards, 350 m: &sync.RWMutex{}, 351 }, nil 352 } 353 354 // NewReportLoader constructs a loader that loads result reports over HTTP from 355 // a shared.TestRun.RawResultsURL. 356 // nolint:ireturn // TODO: Fix ireturn lint error 357 func NewReportLoader() ReportLoader { 358 return HTTPReportLoader{} 359 } 360 361 func (i *shardedWPTIndex) syncGetRun(id RunID) (shared.TestRun, error) { 362 i.m.RLock() 363 defer i.m.RUnlock() 364 365 run, loaded := i.runs[id] 366 if !loaded { 367 return shared.TestRun{}, fmt.Errorf("Unknown run ID: %v", id) 368 } 369 370 return run, nil 371 } 372 373 func (i *shardedWPTIndex) syncGetRuns(ids []RunID) ([]shared.TestRun, error) { 374 i.m.RLock() 375 defer i.m.RUnlock() 376 377 runs := make([]shared.TestRun, len(ids)) 378 for j := range ids { 379 run, ok := i.runs[ids[j]] 380 if !ok { 381 return nil, fmt.Errorf("Unknown run ID: %v", ids[j]) 382 } 383 384 runs[j] = run 385 } 386 387 return runs, nil 388 } 389 390 func (i *shardedWPTIndex) syncMarkInProgress(run shared.TestRun) error { 391 i.m.Lock() 392 defer i.m.Unlock() 393 394 id := RunID(run.ID) 395 _, loaded := i.runs[id] 396 if loaded { 397 return errRunExists 398 } 399 if i.inFlight.Contains(id) { 400 return errRunLoading 401 } 402 403 i.inFlight.Add(id) 404 405 return nil 406 } 407 408 func (i *shardedWPTIndex) syncClearInProgress(run shared.TestRun) error { 409 i.m.Lock() 410 defer i.m.Unlock() 411 412 id := RunID(run.ID) 413 if !i.inFlight.Contains(id) { 414 return errNilRun 415 } 416 417 i.inFlight.Remove(id) 418 419 return nil 420 } 421 422 func (i *shardedWPTIndex) syncStoreRun(run shared.TestRun, data []map[TestID]testData) error { 423 i.m.Lock() 424 defer i.m.Unlock() 425 426 id := RunID(run.ID) 427 for j, shardData := range data { 428 if err := syncStoreRunOnShard(i.shards[j], id, shardData); err != nil { 429 return err 430 } 431 } 432 i.runs[id] = run 433 i.lru.Access(int64(id)) 434 435 return nil 436 } 437 438 func syncStoreRunOnShard(shard *wptIndex, id RunID, shardData map[TestID]testData) error { 439 shard.m.Lock() 440 defer shard.m.Unlock() 441 442 runResults := NewRunResults() 443 for t, data := range shardData { 444 shard.tests.Add(t, data.testName.name, data.testName.subName) 445 runResults.Add(data.ResultID, t) 446 } 447 448 return shard.results.Add(id, runResults) 449 } 450 451 func (i *shardedWPTIndex) syncEvictRuns(percent float64) (int, error) { 452 i.m.Lock() 453 defer i.m.Unlock() 454 455 if len(i.runs) == 0 { 456 return 0, errNoRuns 457 } 458 459 runIDs := i.lru.EvictLRU(percent) 460 if len(runIDs) == 0 { 461 return 0, errNoRuns 462 } 463 464 for _, runID := range runIDs { 465 id := RunID(runID) 466 467 // Delete data from shards, and from runs collection. 468 for _, shard := range i.shards { 469 if err := syncDeleteResultsFromShard(shard, id); err != nil { 470 return 0, err 471 } 472 } 473 delete(i.runs, id) 474 } 475 476 return len(runIDs), nil 477 } 478 479 func syncDeleteResultsFromShard(shard *wptIndex, id RunID) error { 480 shard.m.Lock() 481 defer shard.m.Unlock() 482 483 return shard.results.Delete(id) 484 } 485 486 func (i *shardedWPTIndex) syncExtractRuns(ids []RunID) ([]index, error) { 487 i.m.RLock() 488 defer i.m.RUnlock() 489 490 idxs := make([]index, len(i.shards)) 491 var err error 492 for j, shard := range i.shards { 493 idxs[j], err = syncMakeIndex(shard, ids) 494 if err != nil { 495 return nil, err 496 } 497 } 498 499 for _, id := range ids { 500 i.lru.Access(int64(id)) 501 } 502 503 return idxs, nil 504 } 505 506 func syncMakeIndex(shard *wptIndex, ids []RunID) (index, error) { 507 shard.m.RLock() 508 defer shard.m.RUnlock() 509 510 tests := shard.tests 511 runResults := make(map[RunID]RunResults) 512 for _, id := range ids { 513 rrs := shard.results.ForRun(id) 514 if rrs == nil { 515 return index{}, fmt.Errorf("Run is unknown to shard: RunID=%v", id) 516 } 517 runResults[id] = shard.results.ForRun(id) 518 } 519 520 return index{ 521 tests: tests, 522 runResults: runResults, 523 m: shard.m, 524 }, nil 525 } 526 527 func newWPTIndex(tests Tests) *wptIndex { 528 return &wptIndex{ 529 tests: tests, 530 results: NewResults(), 531 m: &sync.RWMutex{}, 532 } 533 }