github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/api/query/cache/backfill/backfill.go (about)

     1  // Copyright 2018 The WPT Dashboard Project. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file.
     4  
     5  package backfill
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"time"
    11  
    12  	"cloud.google.com/go/datastore"
    13  	"github.com/sirupsen/logrus"
    14  	"google.golang.org/api/option"
    15  
    16  	"github.com/web-platform-tests/wpt.fyi/api/query"
    17  	"github.com/web-platform-tests/wpt.fyi/api/query/cache/index"
    18  	"github.com/web-platform-tests/wpt.fyi/api/query/cache/monitor"
    19  	"github.com/web-platform-tests/wpt.fyi/shared"
    20  )
    21  
    22  type backfillIndex struct {
    23  	index.ProxyIndex
    24  
    25  	backfilling bool
    26  }
    27  
    28  type backfillMonitor struct {
    29  	monitor.ProxyMonitor
    30  
    31  	idx *backfillIndex
    32  }
    33  
    34  // bytesPerRun is a slight over estimate of the memory requirements for one WPT
    35  // run's indexed data. This value was determined experimentally in the early
    36  // phases of search cache development.
    37  const bytesPerRun = uint64(6.5e+7)
    38  
    39  var errNilIndex = errors.New("Index to backfill is nil")
    40  
    41  // GetDatastore constructs a shared.Datastore interface that loads runs from Datastore
    42  // in reverse cronological order, by shared.TestRun.TimeStart.
    43  // nolint:ireturn // TODO: Fix ireturn lint error
    44  func GetDatastore(projectID string, gcpCredentialsFile *string, _ shared.Logger) (shared.Datastore, error) {
    45  	ctx := context.WithValue(context.Background(), shared.DefaultLoggerCtxKey(), logrus.StandardLogger())
    46  	var client *datastore.Client
    47  	var err error
    48  	if gcpCredentialsFile != nil && *gcpCredentialsFile != "" {
    49  		client, err = datastore.NewClient(ctx, projectID, option.WithCredentialsFile(*gcpCredentialsFile))
    50  	} else {
    51  		client, err = datastore.NewClient(ctx, projectID)
    52  	}
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	return shared.NewCloudDatastore(ctx, client), nil
    58  }
    59  
    60  func (i *backfillIndex) EvictRuns(percent float64) (int, error) {
    61  	i.backfilling = false
    62  
    63  	return i.ProxyIndex.EvictRuns(percent)
    64  }
    65  
    66  func (m *backfillMonitor) Stop() error {
    67  	m.idx.backfilling = false
    68  
    69  	return m.ProxyMonitor.Stop()
    70  }
    71  
    72  // nolint:all // TODO: Fix ireturn lint error
    73  func (*backfillIndex) Bind([]shared.TestRun, query.ConcreteQuery) (query.Plan, error) {
    74  	return nil, nil
    75  }
    76  
    77  // FillIndex starts backfilling an index given a series of configuration
    78  // parameters for run fetching and index monitoring. The backfilling process
    79  // will halt either:
    80  // The first time a run is evicted from the index.Index via EvictAnyRun(), OR
    81  // the first time the returned monitor.Monitor is stopped via Stop().
    82  // nolint:ireturn // TODO: Fix ireturn lint error
    83  func FillIndex(
    84  	store shared.Datastore,
    85  	logger shared.Logger,
    86  	rt monitor.Runtime,
    87  	interval time.Duration,
    88  	maxIngestedRuns uint,
    89  	maxBytes uint64,
    90  	evictionPercent float64,
    91  	idx index.Index,
    92  ) (monitor.Monitor, error) {
    93  	if idx == nil {
    94  		return nil, errNilIndex
    95  	}
    96  
    97  	bfIdx := &backfillIndex{
    98  		ProxyIndex:  index.NewProxyIndex(idx),
    99  		backfilling: true,
   100  	}
   101  	idxMon, err := monitor.NewIndexMonitor(logger, rt, interval, maxIngestedRuns, maxBytes, evictionPercent, bfIdx)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	bfMon := &backfillMonitor{
   106  		ProxyMonitor: monitor.NewProxyMonitor(idxMon),
   107  		idx:          bfIdx,
   108  	}
   109  
   110  	err = startBackfillMonitor(store, logger, maxBytes, bfMon)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	return bfMon, nil
   116  }
   117  
   118  func startBackfillMonitor(store shared.Datastore, logger shared.Logger, maxBytes uint64, m *backfillMonitor) error {
   119  	// FetchRuns will return at most N runs for each product, so divide the upper bound by the number of products.
   120  	limit := int(maxBytes/bytesPerRun) / len(shared.GetDefaultProducts())
   121  	runsByProduct, err := store.TestRunQuery().LoadTestRuns(shared.GetDefaultProducts(), nil, nil, nil, nil, &limit, nil)
   122  	if err != nil {
   123  		return err
   124  	}
   125  	if len(runsByProduct.AllRuns()) < 1 {
   126  		return nil
   127  	}
   128  
   129  	// Start the monitor to ensure that memory pressure is tracked.
   130  	go func() {
   131  		err := m.Start()
   132  		if err != nil {
   133  			logger.Warningf("Failed to start back fill: %s", err.Error())
   134  		}
   135  	}()
   136  
   137  	// Backfill index until its backfilling parameter is set to false, or
   138  	// collection of test runs is exhausted.
   139  	go func() {
   140  		most := 0
   141  		for _, productRuns := range runsByProduct {
   142  			if most < len(productRuns.TestRuns) {
   143  				most = len(productRuns.TestRuns)
   144  			}
   145  		}
   146  		for i := 0; i < most && m.idx.backfilling; i++ {
   147  			for _, productRuns := range runsByProduct {
   148  				if !m.idx.backfilling {
   149  					logger.Warningf("Backfilling halted mid-iteration")
   150  
   151  					break
   152  				} else if i >= len(productRuns.TestRuns) {
   153  					continue
   154  				}
   155  				run := productRuns.TestRuns[i]
   156  				logger.Infof("Backfilling index with run %v", run)
   157  				err = m.idx.IngestRun(run)
   158  				if err != nil {
   159  					logger.Errorf("Failed to ingest run during backfill: %v: %v", run, err)
   160  				} else {
   161  					logger.Infof("Backfilled index with run %v", run)
   162  				}
   163  			}
   164  			logger.Infof("Backfilling complete")
   165  		}
   166  	}()
   167  
   168  	return nil
   169  }