github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/api/query/cache/index/index.go (about)

     1  // Copyright 2018 The WPT Dashboard Project. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file.
     4  
     5  // nolint:godox // TODO(Hexcles): Extract type RunID to another package (shared) so that Index
     6  // can be mocked into a different package without cyclic imports.
     7  
     8  package index
     9  
    10  import (
    11  	"context"
    12  	"encoding/json"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  	"math"
    17  	"net/http"
    18  	"sync"
    19  
    20  	mapset "github.com/deckarep/golang-set"
    21  	"github.com/web-platform-tests/wpt.fyi/api/query"
    22  	"github.com/web-platform-tests/wpt.fyi/api/query/cache/lru"
    23  	"github.com/web-platform-tests/wpt.fyi/shared"
    24  	"github.com/web-platform-tests/wpt.fyi/shared/metrics"
    25  
    26  	"github.com/sirupsen/logrus"
    27  )
    28  
    29  var (
    30  	errNilRun             = errors.New("Test run is nil")
    31  	errNoQuery            = errors.New("No query provided")
    32  	errNoRuns             = errors.New("No runs")
    33  	errRunExists          = errors.New("Run already exists in index")
    34  	errRunLoading         = errors.New("Run currently being loaded into index")
    35  	errSomeShardsRequired = errors.New("Index must have at least one shard")
    36  	errZeroRun            = errors.New("Cannot ingest run with ID of 0")
    37  	errEmptyReport        = errors.New("Report contains no results")
    38  )
    39  
    40  // ErrRunExists returns the error associated with an attempt to perform
    41  // operations on a run currently unknown to an Index when the Index, in fact,
    42  // already knows about the run.
    43  func ErrRunExists() error {
    44  	return errRunExists
    45  }
    46  
    47  // ErrRunLoading returns the error associated with an attempt to perform
    48  // operations on a run currently unknown to an Index when the Index, in fact,
    49  // is currently loading data associated with the run.
    50  func ErrRunLoading() error {
    51  	return errRunLoading
    52  }
    53  
    54  // Index is an index of test run results that can ingest and evict runs.
    55  type Index interface {
    56  	query.Binder
    57  
    58  	// Run loads the metadata associated with the given RunID value. It returns
    59  	// an error if the Index does not understand the given RunID value.
    60  	Run(RunID) (shared.TestRun, error)
    61  	// Runs loads the metadata associated with the given RunID values. It returns
    62  	// an error if the Index does not understand one or more of the given RunID
    63  	// values.
    64  	Runs([]RunID) ([]shared.TestRun, error)
    65  	// IngestRun loads the test run results associated with the input test run
    66  	// into the index.
    67  	IngestRun(shared.TestRun) error
    68  	// EvictRuns reduces memory pressure by evicting the cache's choice of runs
    69  	// from memory. The parameter is a percentage of current runs to evict.
    70  	EvictRuns(float64) (int, error)
    71  	// SetIndexChan sets the channel that synchronizes before ingesting a run.
    72  	// This channel is used by index monitors to ensure that the monitor is
    73  	// scheduled to run frequently enough to keep pace with any influx of ingested
    74  	// runs.
    75  	SetIngestChan(chan bool)
    76  }
    77  
    78  // ProxyIndex is a proxy implementation of the Index interface. This type is
    79  // generally used in type embeddings that wish to override the behaviour of some
    80  // (but not all) methods, deferring to the delegate for all other behaviours.
    81  type ProxyIndex struct {
    82  	delegate Index
    83  }
    84  
    85  // Run loads the metadata for the given run ID value by deferring to the
    86  // proxy's delegate.
    87  func (i *ProxyIndex) Run(id RunID) (shared.TestRun, error) {
    88  	return i.delegate.Run(id)
    89  }
    90  
    91  // Runs loads the metadata for the given run ID values by deferring to the
    92  // proxy's delegate.
    93  func (i *ProxyIndex) Runs(ids []RunID) ([]shared.TestRun, error) {
    94  	return i.delegate.Runs(ids)
    95  }
    96  
    97  // IngestRun loads the given run's results in to the index by deferring to the
    98  // proxy's delegate.
    99  func (i *ProxyIndex) IngestRun(r shared.TestRun) error {
   100  	return i.delegate.IngestRun(r)
   101  }
   102  
   103  // EvictRuns deletes percent% runs from the index by deferring to the proxy's
   104  // delegate.
   105  func (i *ProxyIndex) EvictRuns(percent float64) (int, error) {
   106  	return i.delegate.EvictRuns(percent)
   107  }
   108  
   109  // SetIngestChan sets the channel that synchronizes before ingesting a run by
   110  // deferring to the proxy's delegate.
   111  func (i *ProxyIndex) SetIngestChan(c chan bool) {
   112  	i.delegate.SetIngestChan(c)
   113  }
   114  
   115  // NewProxyIndex instantiates a new proxy index bound to the given delegate.
   116  func NewProxyIndex(idx Index) ProxyIndex {
   117  	return ProxyIndex{idx}
   118  }
   119  
   120  // ReportLoader handles loading a WPT test results report based on metadata in
   121  // a shared.TestRun.
   122  type ReportLoader interface {
   123  	Load(shared.TestRun) (*metrics.TestResultsReport, error)
   124  }
   125  
   126  // shardedWPTIndex is an Index that manages test and result data across mutually
   127  // exclusive shards.
   128  type shardedWPTIndex struct {
   129  	runs     map[RunID]shared.TestRun
   130  	lru      lru.LRU
   131  	inFlight mapset.Set
   132  	loader   ReportLoader
   133  	shards   []*wptIndex
   134  	m        *sync.RWMutex
   135  	c        chan bool
   136  }
   137  
   138  // wptIndex is an index of tests and results. Multicore machines should use
   139  // shardedWPTIndex, which embed a slice of wptIndex containing mutually
   140  // exclusive subsets of test and result data.
   141  type wptIndex struct {
   142  	tests   Tests
   143  	results Results
   144  	m       *sync.RWMutex
   145  }
   146  
   147  // testData is a wrapper for a single unit of test+result data from a test run.
   148  type testData struct {
   149  	testName
   150  	ResultID
   151  }
   152  
   153  // HTTPReportLoader loads WPT test run reports from the URL specified in test
   154  // run metadata.
   155  type HTTPReportLoader struct{}
   156  
   157  func (i *shardedWPTIndex) Run(id RunID) (shared.TestRun, error) {
   158  	return i.syncGetRun(id)
   159  }
   160  
   161  func (i *shardedWPTIndex) Runs(ids []RunID) ([]shared.TestRun, error) {
   162  	return i.syncGetRuns(ids)
   163  }
   164  
   165  func (i *shardedWPTIndex) IngestRun(r shared.TestRun) error {
   166  	// Error cases: ID cannot be 0, run cannot be loaded or loading-in-progress.
   167  	if r.ID == 0 {
   168  		return errZeroRun
   169  	}
   170  
   171  	// Synchronize with anything that may be monitoring run ingestion. Do this
   172  	// before any i.sync* routines to avoid deadlock.
   173  	if i.c != nil {
   174  		i.c <- true
   175  	}
   176  
   177  	if err := i.syncMarkInProgress(r); err != nil {
   178  		return err
   179  	}
   180  	defer func() {
   181  		if err := i.syncClearInProgress(r); err != nil {
   182  			logrus.Warningf("Sync clear error: %s", err.Error())
   183  		}
   184  	}()
   185  
   186  	// Delegate loader to construct complete run report.
   187  	report, err := i.loader.Load(r)
   188  	if err != nil && !errors.Is(err, errEmptyReport) {
   189  		return err
   190  	}
   191  
   192  	// Results of different tests will be stored in different shards, based on the
   193  	// top-level test (i.e., not subtests) integral ID of each test in the report.
   194  	//
   195  	// Create RunResults for each shard's partition of this run's results.
   196  	numShards := len(i.shards)
   197  	numShardsU64 := uint64(numShards)
   198  	shardData := make([]map[TestID]testData, numShards)
   199  	for j := 0; j < numShards; j++ {
   200  		shardData[j] = make(map[TestID]testData)
   201  	}
   202  
   203  	for _, res := range report.Results {
   204  		// Add top-level test (i.e., not subtest) result to appropriate shard.
   205  		t, err := computeTestID(res.Test, nil)
   206  		if err != nil {
   207  			return err
   208  		}
   209  
   210  		shardIdx := int(t.testID % numShardsU64)
   211  		dataForShard := shardData[shardIdx]
   212  		re := ResultID(shared.TestStatusValueFromString(res.Status))
   213  		dataForShard[t] = testData{
   214  			testName: testName{
   215  				name:    res.Test,
   216  				subName: nil,
   217  			},
   218  			ResultID: re,
   219  		}
   220  
   221  		// Dedup subtests, warning when subtest names are duplicated.
   222  		subs := make(map[string]metrics.SubTest)
   223  		for _, sub := range res.Subtests {
   224  			if _, ok := subs[sub.Name]; ok {
   225  				logrus.Warningf("Duplicate subtests with the same name: %s %s", res.Test, sub.Name)
   226  
   227  				continue
   228  			}
   229  			subs[sub.Name] = sub
   230  		}
   231  
   232  		// Add each subtests' result to the appropriate shard (same shard as
   233  		// top-level test).
   234  		for i := range subs {
   235  			name := subs[i].Name
   236  			t, err := computeTestID(res.Test, &name)
   237  			if err != nil {
   238  				return err
   239  			}
   240  
   241  			re := ResultID(shared.TestStatusValueFromString(subs[i].Status))
   242  			dataForShard[t] = testData{
   243  				testName: testName{
   244  					name:    res.Test,
   245  					subName: &name,
   246  				},
   247  				ResultID: re,
   248  			}
   249  		}
   250  	}
   251  
   252  	if err := i.syncStoreRun(r, shardData); err != nil {
   253  		logrus.Warningf("Sync store run error: %s", err.Error())
   254  	}
   255  
   256  	return nil
   257  }
   258  
   259  func (i *shardedWPTIndex) EvictRuns(percent float64) (int, error) {
   260  	return i.syncEvictRuns(math.Max(0.0, math.Min(1.0, percent)))
   261  }
   262  
   263  // nolint:ireturn // TODO: Fix ireturn lint error
   264  func (i *shardedWPTIndex) Bind(runs []shared.TestRun, q query.ConcreteQuery) (query.Plan, error) {
   265  	if len(runs) == 0 {
   266  		return nil, errNoRuns
   267  	} else if q == nil {
   268  		return nil, errNoQuery
   269  	}
   270  
   271  	ids := make([]RunID, len(runs))
   272  	for j, run := range runs {
   273  		ids[j] = RunID(run.ID)
   274  	}
   275  	idxs, err := i.syncExtractRuns(ids)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  
   280  	fs := make(ShardedFilter, len(idxs))
   281  	for j, idx := range idxs {
   282  		f, err := newFilter(idx, q)
   283  		if err != nil {
   284  			return nil, err
   285  		}
   286  		fs[j] = f
   287  	}
   288  
   289  	return fs, nil
   290  }
   291  
   292  func (i *shardedWPTIndex) SetIngestChan(c chan bool) {
   293  	i.c = c
   294  }
   295  
   296  // Load for HTTPReportLoader loads WPT test run reports from the URL specified
   297  // in test run metadata.
   298  func (l HTTPReportLoader) Load(run shared.TestRun) (*metrics.TestResultsReport, error) {
   299  	// Attempt to fetch-and-unmarshal run from run.RawResultsURL.
   300  	req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, run.RawResultsURL, nil)
   301  	if err != nil {
   302  		return nil, fmt.Errorf("failed to create GET request for Results URL: %w", err)
   303  	}
   304  	resp, err := http.DefaultClient.Do(req)
   305  	if err != nil {
   306  		return nil, err
   307  	}
   308  	defer resp.Body.Close()
   309  	if resp.StatusCode != http.StatusOK {
   310  		err = fmt.Errorf(`Non-OK HTTP status code of %d from "%s" for run ID=%d`, resp.StatusCode, run.RawResultsURL, run.ID)
   311  
   312  		return nil, err
   313  	}
   314  	data, err := io.ReadAll(resp.Body)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	var report metrics.TestResultsReport
   319  	err = json.Unmarshal(data, &report)
   320  	if err != nil {
   321  		return nil, err
   322  	}
   323  	if len(report.Results) == 0 {
   324  		return &report, errEmptyReport
   325  	}
   326  
   327  	return &report, nil
   328  }
   329  
   330  // NewShardedWPTIndex creates a new empty Index for WPT test run results.
   331  // nolint:ireturn // TODO: Fix ireturn lint error
   332  func NewShardedWPTIndex(loader ReportLoader, numShards int) (Index, error) {
   333  	if numShards <= 0 {
   334  		return nil, errSomeShardsRequired
   335  	}
   336  
   337  	shards := make([]*wptIndex, 0, numShards)
   338  	for i := 0; i < numShards; i++ {
   339  		tests := NewTests()
   340  		shards = append(shards, newWPTIndex(tests))
   341  	}
   342  
   343  	// nolint:exhaustruct // TODO: Fix exhaustruct lint error.
   344  	return &shardedWPTIndex{
   345  		runs:     make(map[RunID]shared.TestRun),
   346  		lru:      lru.NewLRU(),
   347  		inFlight: mapset.NewSet(),
   348  		loader:   loader,
   349  		shards:   shards,
   350  		m:        &sync.RWMutex{},
   351  	}, nil
   352  }
   353  
   354  // NewReportLoader constructs a loader that loads result reports over HTTP from
   355  // a shared.TestRun.RawResultsURL.
   356  // nolint:ireturn // TODO: Fix ireturn lint error
   357  func NewReportLoader() ReportLoader {
   358  	return HTTPReportLoader{}
   359  }
   360  
   361  func (i *shardedWPTIndex) syncGetRun(id RunID) (shared.TestRun, error) {
   362  	i.m.RLock()
   363  	defer i.m.RUnlock()
   364  
   365  	run, loaded := i.runs[id]
   366  	if !loaded {
   367  		return shared.TestRun{}, fmt.Errorf("Unknown run ID: %v", id)
   368  	}
   369  
   370  	return run, nil
   371  }
   372  
   373  func (i *shardedWPTIndex) syncGetRuns(ids []RunID) ([]shared.TestRun, error) {
   374  	i.m.RLock()
   375  	defer i.m.RUnlock()
   376  
   377  	runs := make([]shared.TestRun, len(ids))
   378  	for j := range ids {
   379  		run, ok := i.runs[ids[j]]
   380  		if !ok {
   381  			return nil, fmt.Errorf("Unknown run ID: %v", ids[j])
   382  		}
   383  
   384  		runs[j] = run
   385  	}
   386  
   387  	return runs, nil
   388  }
   389  
   390  func (i *shardedWPTIndex) syncMarkInProgress(run shared.TestRun) error {
   391  	i.m.Lock()
   392  	defer i.m.Unlock()
   393  
   394  	id := RunID(run.ID)
   395  	_, loaded := i.runs[id]
   396  	if loaded {
   397  		return errRunExists
   398  	}
   399  	if i.inFlight.Contains(id) {
   400  		return errRunLoading
   401  	}
   402  
   403  	i.inFlight.Add(id)
   404  
   405  	return nil
   406  }
   407  
   408  func (i *shardedWPTIndex) syncClearInProgress(run shared.TestRun) error {
   409  	i.m.Lock()
   410  	defer i.m.Unlock()
   411  
   412  	id := RunID(run.ID)
   413  	if !i.inFlight.Contains(id) {
   414  		return errNilRun
   415  	}
   416  
   417  	i.inFlight.Remove(id)
   418  
   419  	return nil
   420  }
   421  
   422  func (i *shardedWPTIndex) syncStoreRun(run shared.TestRun, data []map[TestID]testData) error {
   423  	i.m.Lock()
   424  	defer i.m.Unlock()
   425  
   426  	id := RunID(run.ID)
   427  	for j, shardData := range data {
   428  		if err := syncStoreRunOnShard(i.shards[j], id, shardData); err != nil {
   429  			return err
   430  		}
   431  	}
   432  	i.runs[id] = run
   433  	i.lru.Access(int64(id))
   434  
   435  	return nil
   436  }
   437  
   438  func syncStoreRunOnShard(shard *wptIndex, id RunID, shardData map[TestID]testData) error {
   439  	shard.m.Lock()
   440  	defer shard.m.Unlock()
   441  
   442  	runResults := NewRunResults()
   443  	for t, data := range shardData {
   444  		shard.tests.Add(t, data.testName.name, data.testName.subName)
   445  		runResults.Add(data.ResultID, t)
   446  	}
   447  
   448  	return shard.results.Add(id, runResults)
   449  }
   450  
   451  func (i *shardedWPTIndex) syncEvictRuns(percent float64) (int, error) {
   452  	i.m.Lock()
   453  	defer i.m.Unlock()
   454  
   455  	if len(i.runs) == 0 {
   456  		return 0, errNoRuns
   457  	}
   458  
   459  	runIDs := i.lru.EvictLRU(percent)
   460  	if len(runIDs) == 0 {
   461  		return 0, errNoRuns
   462  	}
   463  
   464  	for _, runID := range runIDs {
   465  		id := RunID(runID)
   466  
   467  		// Delete data from shards, and from runs collection.
   468  		for _, shard := range i.shards {
   469  			if err := syncDeleteResultsFromShard(shard, id); err != nil {
   470  				return 0, err
   471  			}
   472  		}
   473  		delete(i.runs, id)
   474  	}
   475  
   476  	return len(runIDs), nil
   477  }
   478  
   479  func syncDeleteResultsFromShard(shard *wptIndex, id RunID) error {
   480  	shard.m.Lock()
   481  	defer shard.m.Unlock()
   482  
   483  	return shard.results.Delete(id)
   484  }
   485  
   486  func (i *shardedWPTIndex) syncExtractRuns(ids []RunID) ([]index, error) {
   487  	i.m.RLock()
   488  	defer i.m.RUnlock()
   489  
   490  	idxs := make([]index, len(i.shards))
   491  	var err error
   492  	for j, shard := range i.shards {
   493  		idxs[j], err = syncMakeIndex(shard, ids)
   494  		if err != nil {
   495  			return nil, err
   496  		}
   497  	}
   498  
   499  	for _, id := range ids {
   500  		i.lru.Access(int64(id))
   501  	}
   502  
   503  	return idxs, nil
   504  }
   505  
   506  func syncMakeIndex(shard *wptIndex, ids []RunID) (index, error) {
   507  	shard.m.RLock()
   508  	defer shard.m.RUnlock()
   509  
   510  	tests := shard.tests
   511  	runResults := make(map[RunID]RunResults)
   512  	for _, id := range ids {
   513  		rrs := shard.results.ForRun(id)
   514  		if rrs == nil {
   515  			return index{}, fmt.Errorf("Run is unknown to shard: RunID=%v", id)
   516  		}
   517  		runResults[id] = shard.results.ForRun(id)
   518  	}
   519  
   520  	return index{
   521  		tests:      tests,
   522  		runResults: runResults,
   523  		m:          shard.m,
   524  	}, nil
   525  }
   526  
   527  func newWPTIndex(tests Tests) *wptIndex {
   528  	return &wptIndex{
   529  		tests:   tests,
   530  		results: NewResults(),
   531  		m:       &sync.RWMutex{},
   532  	}
   533  }