github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/api/query/search.go (about)

     1  // Copyright 2018 The WPT Dashboard Project. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file.
     4  
     5  package query
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/json"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"net/http"
    15  	"net/url"
    16  	"sort"
    17  	"strconv"
    18  	"strings"
    19  	time "time"
    20  
    21  	"github.com/web-platform-tests/wpt.fyi/shared"
    22  )
    23  
    24  type byName []shared.SearchResult
    25  
    26  func (r byName) Len() int           { return len(r) }
    27  func (r byName) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
    28  func (r byName) Less(i, j int) bool { return r[i].Test < r[j].Test }
    29  
    30  type searchHandler struct {
    31  	api shared.AppEngineAPI
    32  }
    33  
    34  type unstructuredSearchHandler struct {
    35  	queryHandler
    36  }
    37  
    38  type structuredSearchHandler struct {
    39  	queryHandler
    40  
    41  	api shared.AppEngineAPI
    42  }
    43  
    44  func apiSearchHandler(w http.ResponseWriter, r *http.Request) {
    45  	api := shared.NewAppEngineAPI(r.Context())
    46  	searchHandler{api}.ServeHTTP(w, r)
    47  }
    48  
    49  func (sh searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    50  	if r.Method != http.MethodGet && r.Method != http.MethodPost {
    51  		http.Error(w, "Invalid HTTP method", http.StatusBadRequest)
    52  
    53  		return
    54  	}
    55  
    56  	ctx := sh.api.Context()
    57  	mc := shared.NewGZReadWritable(shared.NewRedisReadWritable(ctx, 48*time.Hour))
    58  	qh := queryHandler{ // nolint:exhaustruct // TODO: Fix exhaustruct lint error
    59  		store:      shared.NewAppEngineDatastore(ctx, true),
    60  		dataSource: shared.NewByteCachedStore(ctx, mc, shared.NewHTTPReadable(ctx)),
    61  	}
    62  	var delegate http.Handler
    63  	if r.Method == http.MethodGet {
    64  		delegate = unstructuredSearchHandler{queryHandler: qh}
    65  	} else {
    66  		delegate = structuredSearchHandler{queryHandler: qh, api: sh.api}
    67  	}
    68  	ch := shared.NewCachingHandler(ctx, delegate, mc, isRequestCacheable, cacheKey, shouldCacheSearchResponse)
    69  	ch.ServeHTTP(w, r)
    70  }
    71  
    72  func (sh structuredSearchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    73  	data, err := io.ReadAll(r.Body)
    74  	if err != nil {
    75  		http.Error(w, "Failed to read request body", http.StatusInternalServerError)
    76  	}
    77  	err = r.Body.Close()
    78  	if err != nil {
    79  		http.Error(w, "Failed to finish reading request body", http.StatusInternalServerError)
    80  	}
    81  
    82  	var rq RunQuery
    83  	err = json.Unmarshal(data, &rq)
    84  	if err != nil {
    85  		http.Error(w, err.Error(), http.StatusBadRequest)
    86  
    87  		return
    88  	}
    89  
    90  	// Prepare logging.
    91  	ctx := sh.api.Context()
    92  	logger := shared.GetLogger(ctx)
    93  
    94  	var simpleQ TestNamePattern
    95  
    96  	r2 := r.Clone(r.Context())
    97  	r2url := *r.URL
    98  	r2.URL = &r2url
    99  	r2.Method = http.MethodGet
   100  	q := r.URL.Query()
   101  	q.Add("q", simpleQ.Pattern)
   102  	// Assemble list of run IDs for later use.
   103  	runIDStrs := make([]string, 0, len(rq.RunIDs))
   104  	for _, id := range rq.RunIDs {
   105  		runID := strconv.FormatInt(id, 10)
   106  		q.Add("run_id", runID)
   107  		runIDStrs = append(runIDStrs, strconv.FormatInt(id, 10))
   108  	}
   109  	runIDsStr := strings.Join(runIDStrs, ",")
   110  	r2.URL.RawQuery = q.Encode()
   111  
   112  	// Check if the query is a simple (empty/just True, or test name only) query
   113  	var isSimpleQ bool
   114  	{
   115  		if _, isTrueQ := rq.AbstractQuery.(True); isTrueQ {
   116  			isSimpleQ = true
   117  		} else if exists, isExists := rq.AbstractQuery.(AbstractExists); isExists && len(exists.Args) == 1 {
   118  			simpleQ, isSimpleQ = exists.Args[0].(TestNamePattern)
   119  		}
   120  		for _, param := range []string{"interop", "subtests", "diff"} {
   121  			val, _ := shared.ParseBooleanParam(q, param)
   122  			isSimpleQ = isSimpleQ && (val == nil || !*val)
   123  		}
   124  
   125  		// Check old summary files. If any can't be found,
   126  		// use the searchcache to aggregate the runs.
   127  		summaryErr := sh.validateSummaryVersions(r2.URL.Query(), logger)
   128  		if summaryErr != nil {
   129  			isSimpleQ = false
   130  			if errors.Is(summaryErr, ErrBadSummaryVersion) {
   131  				logger.Debugf("%s yields unsupported summary version. %s", r2.URL.Query().Encode(), summaryErr.Error())
   132  			} else {
   133  				logger.Debugf("Error checking summary file names: %v", summaryErr)
   134  			}
   135  		}
   136  	}
   137  
   138  	// Use searchcache for a complex query or if old summary files exist.
   139  	if !isSimpleQ {
   140  		resp, err := sh.useSearchcache(w, r, data, logger)
   141  		if err != nil {
   142  			http.Error(w, "Error connecting to search API cache", http.StatusInternalServerError)
   143  		} else {
   144  			defer resp.Body.Close()
   145  			w.WriteHeader(resp.StatusCode)
   146  			_, err = io.Copy(w, resp.Body)
   147  			if err != nil {
   148  				logger.Errorf("Error forwarding response payload from search cache: %v", err)
   149  			}
   150  		}
   151  
   152  		return
   153  	}
   154  
   155  	q = r.URL.Query()
   156  	q.Set("q", simpleQ.Pattern)
   157  	q.Set("run_ids", runIDsStr)
   158  	r2.URL.RawQuery = q.Encode()
   159  	// Structured query is equivalent to unstructured query.
   160  	//delegate to unstructured query handler.
   161  	unstructuredSearchHandler{queryHandler: sh.queryHandler}.ServeHTTP(w, r2)
   162  }
   163  
   164  func (sh structuredSearchHandler) useSearchcache(_ http.ResponseWriter, r *http.Request,
   165  	data []byte, logger shared.Logger) (*http.Response, error) {
   166  	hostname := sh.api.GetServiceHostname("searchcache")
   167  	// nolint:godox // TODO(Issue #2941): This will not work when hostname is localhost (http scheme needed).
   168  	fwdURL, err := url.Parse(fmt.Sprintf("https://%s/api/search/cache", hostname))
   169  	if err != nil {
   170  		logger.Debugf("Error parsing hostname.")
   171  	}
   172  	fwdURL.RawQuery = r.URL.RawQuery
   173  
   174  	logger.Infof("Forwarding structured search request to %s: %s", hostname, string(data))
   175  
   176  	client := sh.api.GetHTTPClientWithTimeout(time.Second * 15)
   177  	req, err := http.NewRequestWithContext(r.Context(), http.MethodPost, fwdURL.String(), bytes.NewBuffer(data))
   178  	if err != nil {
   179  		logger.Errorf("Failed to create request to POST %s: %v", fwdURL.String(), err)
   180  
   181  		return nil, err
   182  	}
   183  	req.Header.Add("Content-Type", "application/json")
   184  
   185  	resp, err := client.Do(req)
   186  	if err != nil {
   187  		logger.Errorf("Error connecting to search API cache: %v", err)
   188  
   189  		return nil, err
   190  	}
   191  
   192  	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
   193  		msg := fmt.Sprintf("Error from request: POST %s: STATUS %d", fwdURL.String(), resp.StatusCode)
   194  		errBody, err2 := io.ReadAll(resp.Body)
   195  		if err2 == nil {
   196  			msg = fmt.Sprintf("%s: %s", msg, string(errBody))
   197  			resp.Body = io.NopCloser(bytes.NewBuffer(errBody))
   198  		}
   199  		logger.Errorf(msg)
   200  	}
   201  
   202  	return resp, nil
   203  }
   204  
   205  func (sh unstructuredSearchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   206  	filters, testRuns, summaries, err := sh.processInput(w, r)
   207  	// processInput handles writing any error to w.
   208  	if err != nil {
   209  		return
   210  	}
   211  
   212  	resp := prepareSearchResponse(filters, testRuns, summaries)
   213  
   214  	data, err := json.Marshal(resp)
   215  	if err != nil {
   216  		http.Error(w, err.Error(), http.StatusInternalServerError)
   217  	}
   218  	_, err = w.Write(data)
   219  	if err != nil {
   220  		http.Error(w, err.Error(), http.StatusInternalServerError)
   221  	}
   222  }
   223  
   224  func prepareSearchResponse(
   225  	filters *shared.QueryFilter,
   226  	testRuns []shared.TestRun,
   227  	summaries []summary,
   228  ) shared.SearchResponse {
   229  	resp := shared.SearchResponse{ // nolint:exhaustruct // TODO: Fix exhaustruct lint error
   230  		Runs: testRuns,
   231  	}
   232  	q := canonicalizeStr(filters.Q)
   233  	// Dedup visited file names via a map of results.
   234  	resMap := make(map[string]shared.SearchResult)
   235  	for i, s := range summaries {
   236  		for filename, testInfo := range s {
   237  			// Exclude filenames that do not match query.
   238  			if !strings.Contains(canonicalizeStr(filename), q) {
   239  				continue
   240  			}
   241  			if _, ok := resMap[filename]; !ok {
   242  				resMap[filename] = shared.SearchResult{ // nolint:exhaustruct // TODO: Fix exhaustruct lint error
   243  					Test:         filename,
   244  					LegacyStatus: make([]shared.LegacySearchRunResult, len(testRuns)),
   245  				}
   246  			}
   247  			resMap[filename].LegacyStatus[i] = shared.LegacySearchRunResult{
   248  				Passes:        testInfo.Counts[0],
   249  				Total:         testInfo.Counts[1],
   250  				Status:        testInfo.Status,
   251  				NewAggProcess: true,
   252  			}
   253  		}
   254  	}
   255  	// Load map into slice and sort it.
   256  	resp.Results = make([]shared.SearchResult, 0, len(resMap))
   257  	for _, r := range resMap {
   258  		resp.Results = append(resp.Results, r)
   259  	}
   260  	sort.Sort(byName(resp.Results))
   261  
   262  	return resp
   263  }
   264  
   265  // nolint:gochecknoglobals // TODO: Fix gochecknoglobals lint error
   266  var cacheKey = func(r *http.Request) interface{} {
   267  	if r.Method == http.MethodGet {
   268  		return shared.URLAsCacheKey(r)
   269  	}
   270  
   271  	body := r.Body
   272  	data, err := io.ReadAll(r.Body)
   273  	if err != nil {
   274  		msg := fmt.Sprintf("Failed to read non-GET request body for generating cache key: %v", err)
   275  		shared.GetLogger(r.Context()).Errorf(msg)
   276  		panic(msg)
   277  	}
   278  	defer body.Close()
   279  
   280  	// Ensure that r.Body can be read again by other request handling routines.
   281  	r.Body = io.NopCloser(bytes.NewBuffer(data))
   282  
   283  	return fmt.Sprintf("%s#%s", r.URL.String(), string(data))
   284  }
   285  
   286  // nolint:godox // TODO: Sometimes an empty result set is being cached for a query over
   287  // legitimate runs. For now, prevent serving empty result sets from cache.
   288  // Eventually, a more durable fix to
   289  // https://github.com/web-platform-tests/wpt.fyi/issues/759 should replace this
   290  // approximation.
   291  
   292  // nolint:gochecknoglobals // TODO: Fix gochecknoglobals lint error
   293  var shouldCacheSearchResponse = func(ctx context.Context, statusCode int, payload []byte) bool {
   294  	if !shared.CacheStatusOK(ctx, statusCode, payload) {
   295  		return false
   296  	}
   297  
   298  	var resp shared.SearchResponse
   299  	err := json.Unmarshal(payload, &resp)
   300  	if err != nil {
   301  		shared.GetLogger(ctx).Errorf("Malformed search response")
   302  
   303  		return false
   304  	}
   305  
   306  	if len(resp.Results) == 0 {
   307  		shared.GetLogger(ctx).Errorf("Query yielded no results; not caching")
   308  
   309  		return false
   310  	}
   311  
   312  	return true
   313  }