github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/api/v1/summary.go (about)

     1  /*
     2  Copyright 2023 The TestGrid Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package v1
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"net/http"
    24  	"sort"
    25  
    26  	"github.com/go-chi/chi"
    27  
    28  	"github.com/golang/protobuf/ptypes/timestamp"
    29  	"github.com/sirupsen/logrus"
    30  
    31  	"github.com/GoogleCloudPlatform/testgrid/config"
    32  	apipb "github.com/GoogleCloudPlatform/testgrid/pb/api/v1"
    33  	summarypb "github.com/GoogleCloudPlatform/testgrid/pb/summary"
    34  	"github.com/GoogleCloudPlatform/testgrid/pkg/summarizer"
    35  )
    36  
    37  // TODO(sultan-duisenbay) - consider setting these as API module flags in main.go
    38  const (
    39  	numSummaryFailingTests = 5
    40  	numSummaryFlakyTests   = 5
    41  
    42  	passing    = "PASSING"
    43  	failing    = "FAILING"
    44  	flaky      = "FLAKY"
    45  	stale      = "STALE"
    46  	broken     = "BROKEN"
    47  	pending    = "PENDING"
    48  	acceptable = "ACCEPTABLE"
    49  	unknown    = "UNKNOWN"
    50  )
    51  
    52  var (
    53  	// These should be in line with the TabStatus enum in state.proto.
    54  	tabStatusStr = map[summarypb.DashboardTabSummary_TabStatus]string{
    55  		summarypb.DashboardTabSummary_PASS:       passing,
    56  		summarypb.DashboardTabSummary_FAIL:       failing,
    57  		summarypb.DashboardTabSummary_FLAKY:      flaky,
    58  		summarypb.DashboardTabSummary_STALE:      stale,
    59  		summarypb.DashboardTabSummary_BROKEN:     broken,
    60  		summarypb.DashboardTabSummary_PENDING:    pending,
    61  		summarypb.DashboardTabSummary_ACCEPTABLE: acceptable,
    62  	}
    63  )
    64  
    65  // convertSummary converts the tab summary from storage format (summary.proto) to wire format (data.proto)
    66  func convertSummary(tabSummary *summarypb.DashboardTabSummary) *apipb.TabSummary {
    67  
    68  	fs := extractFailuresSummary(tabSummary.GetFailingTestSummaries())
    69  	hs := extractHealthinessSummary(tabSummary.GetHealthiness())
    70  	return &apipb.TabSummary{
    71  		DashboardName:         tabSummary.DashboardName,
    72  		TabName:               tabSummary.DashboardTabName,
    73  		OverallStatus:         tabStatusStr[tabSummary.OverallStatus],
    74  		DetailedStatusMessage: tabSummary.Status,
    75  		LastRunTimestamp:      generateTimestamp(tabSummary.LastRunTimestamp),
    76  		LastUpdateTimestamp:   generateTimestamp(tabSummary.LastUpdateTimestamp),
    77  		LatestPassingBuild:    tabSummary.LatestGreen,
    78  		FailuresSummary:       fs,
    79  		HealthinessSummary:    hs,
    80  	}
    81  }
    82  
    83  // generateTimestamp converts the float to a pointer to Timestamp proto struct
    84  func generateTimestamp(ts float64) *timestamp.Timestamp {
    85  	sec, nano := math.Modf(ts)
    86  	return &timestamp.Timestamp{
    87  		Seconds: int64(sec),
    88  		Nanos:   int32(nano * 1e9),
    89  	}
    90  }
    91  
    92  // extractFailuresSummary extracts the most important info from summary proto's FailingTestSummaries field.
    93  // This includes stats as well top failing tests info.
    94  // Top failing tests # is determined by numSummaryFailingTests.
    95  func extractFailuresSummary(failingTests []*summarypb.FailingTestSummary) *apipb.FailuresSummary {
    96  	if len(failingTests) == 0 {
    97  		return nil
    98  	}
    99  
   100  	// fetch top failing tests
   101  	topN := int(math.Min(float64(numSummaryFailingTests), float64(len(failingTests))))
   102  
   103  	sort.SliceStable(failingTests, func(i, j int) bool {
   104  		return failingTests[i].FailCount > failingTests[j].FailCount
   105  	})
   106  
   107  	var topTests []*apipb.FailingTestInfo
   108  	for i := 0; i < topN; i++ {
   109  		test := failingTests[i]
   110  
   111  		fti := &apipb.FailingTestInfo{
   112  			DisplayName:   test.DisplayName,
   113  			FailCount:     test.FailCount,
   114  			PassTimestamp: generateTimestamp(test.PassTimestamp),
   115  			FailTimestamp: generateTimestamp(test.FailTimestamp),
   116  		}
   117  		topTests = append(topTests, fti)
   118  	}
   119  
   120  	return &apipb.FailuresSummary{
   121  		FailureStats: &apipb.FailureStats{
   122  			NumFailingTests: int32(len(failingTests)),
   123  		},
   124  		TopFailingTests: topTests,
   125  	}
   126  }
   127  
   128  // extractHealthinessSummary extracts the most important info from summary proto's Healthiness field.
   129  // This includes stats as well top flaky tests info.
   130  // Top flaky tests # is determined by numSummaryFlakyTests.
   131  func extractHealthinessSummary(healthiness *summarypb.HealthinessInfo) *apipb.HealthinessSummary {
   132  	if healthiness == nil {
   133  		return nil
   134  	}
   135  
   136  	// obtain previous flakiness for the whole tab
   137  	// need to distinguish between zero flakiness and absent flakiness
   138  	var prevFlakiness float32
   139  	if len(healthiness.PreviousFlakiness) == 0 {
   140  		prevFlakiness = -1.0
   141  	} else {
   142  		prevFlakiness = healthiness.PreviousFlakiness[0]
   143  	}
   144  
   145  	sort.SliceStable(healthiness.Tests, func(i, j int) bool {
   146  		return healthiness.Tests[i].Flakiness > healthiness.Tests[j].Flakiness
   147  	})
   148  
   149  	// fetch top flaky tests (with +ve flakiness)
   150  	numFlakyTests := 0
   151  	for i := 0; i < len(healthiness.Tests); i++ {
   152  		t := healthiness.Tests[i]
   153  		if t.Flakiness <= 0 {
   154  			break
   155  		}
   156  		numFlakyTests++
   157  	}
   158  
   159  	topN := int(math.Min(float64(numSummaryFlakyTests), float64(numFlakyTests)))
   160  
   161  	var topTests []*apipb.FlakyTestInfo
   162  	for i := 0; i < topN; i++ {
   163  		test := healthiness.Tests[i]
   164  		fti := &apipb.FlakyTestInfo{
   165  			DisplayName: test.DisplayName,
   166  			Flakiness:   test.Flakiness,
   167  			Change:      test.ChangeFromLastInterval,
   168  		}
   169  		topTests = append(topTests, fti)
   170  	}
   171  
   172  	return &apipb.HealthinessSummary{
   173  		TopFlakyTests: topTests,
   174  		HealthinessStats: &apipb.HealthinessStats{
   175  			Start:             healthiness.Start,
   176  			End:               healthiness.End,
   177  			AverageFlakiness:  healthiness.AverageFlakiness,
   178  			PreviousFlakiness: prevFlakiness,
   179  			NumFlakyTests:     int32(numFlakyTests),
   180  		},
   181  	}
   182  }
   183  
   184  // fetchSummary returns the summary struct as defined in summary.proto.
   185  // input dashboard doesn't have to be normalized.
   186  // Returns an error iff the scope refers to non-existent bucket OR server fails to read the summary.
   187  func (s *Server) fetchSummary(ctx context.Context, scope, dashboard string) (*summarypb.DashboardSummary, error) {
   188  	configPath, _, err := s.configPath(scope)
   189  
   190  	summaryPath, err := summarizer.SummaryPath(*configPath, s.SummaryPathPrefix, dashboard)
   191  	if err != nil {
   192  		return nil, fmt.Errorf("failed to create the summary path: %v", err)
   193  	}
   194  
   195  	summary, _, _, err := summarizer.ReadSummary(ctx, s.Client, *summaryPath)
   196  	if err != nil {
   197  		return nil, fmt.Errorf("failed to download summary at %v: %v", summaryPath.String(), err)
   198  	}
   199  
   200  	return summary, nil
   201  }
   202  
   203  // ListTabSummaries returns the list of tab summaries for the particular dashboard.
   204  // Dashboard name doesn't have to be normalized.
   205  // Returns an error iff dashboard does not exist OR the server can't read summary from GCS bucket.
   206  func (s *Server) ListTabSummaries(ctx context.Context, req *apipb.ListTabSummariesRequest) (*apipb.ListTabSummariesResponse, error) {
   207  	ctx, cancel := context.WithTimeout(ctx, s.Timeout)
   208  	defer cancel()
   209  
   210  	scope := req.GetScope()
   211  	cfg, err := s.getConfig(ctx, logrus.WithContext(ctx), scope)
   212  
   213  	// TODO(sultan-duisenbay): return canonical error codes
   214  	if err != nil {
   215  		return nil, fmt.Errorf("failed to fetch config from {%q}: %v", scope, err)
   216  	}
   217  
   218  	cfg.Mutex.RLock()
   219  	defer cfg.Mutex.RUnlock()
   220  
   221  	dashboardKey := config.Normalize(req.GetDashboard())
   222  	if _, ok := cfg.NormalDashboard[dashboardKey]; !ok {
   223  		return nil, fmt.Errorf("dashboard {%q} not found", dashboardKey)
   224  	}
   225  
   226  	summary, err := s.fetchSummary(ctx, scope, dashboardKey)
   227  	if err != nil {
   228  		return nil, fmt.Errorf("failed to fetch summary for dashboard {%q}: %v", dashboardKey, err)
   229  	}
   230  
   231  	if summary == nil {
   232  		return nil, fmt.Errorf("summary for dashboard {%q} not found.", dashboardKey)
   233  	}
   234  
   235  	var resp apipb.ListTabSummariesResponse
   236  	for _, tabSummary := range summary.TabSummaries {
   237  		ts := convertSummary(tabSummary)
   238  		resp.TabSummaries = append(resp.TabSummaries, ts)
   239  	}
   240  	return &resp, nil
   241  
   242  }
   243  
   244  // ListTabSummariesHTTP returns the list of tab summaries as a json.
   245  // Response json: ListTabSummariesResponse
   246  func (s Server) ListTabSummariesHTTP(w http.ResponseWriter, r *http.Request) {
   247  	req := apipb.ListTabSummariesRequest{
   248  		Scope:     r.URL.Query().Get(scopeParam),
   249  		Dashboard: chi.URLParam(r, "dashboard"),
   250  	}
   251  	resp, err := s.ListTabSummaries(r.Context(), &req)
   252  	if err != nil {
   253  		http.Error(w, err.Error(), http.StatusNotFound)
   254  		return
   255  	}
   256  
   257  	s.writeJSON(w, resp)
   258  }
   259  
   260  // GetTabSummary returns the tab summary for the particular dashboard and tab.
   261  // Dashboard and tab names don't have to be normalized.
   262  // Returns an error iff
   263  // - dashboard or tab does not exist
   264  // - the server can't read summary from GCS bucket
   265  // - tab summary for particular tab doesn't exist
   266  func (s *Server) GetTabSummary(ctx context.Context, req *apipb.GetTabSummaryRequest) (*apipb.GetTabSummaryResponse, error) {
   267  	ctx, cancel := context.WithTimeout(ctx, s.Timeout)
   268  	defer cancel()
   269  
   270  	scope := req.GetScope()
   271  	cfg, err := s.getConfig(ctx, logrus.WithContext(ctx), scope)
   272  
   273  	// TODO(sultan-duisenbay): return canonical error codes
   274  	if err != nil {
   275  		return nil, fmt.Errorf("failed to fetch config from {%q}: %v", scope, err)
   276  	}
   277  
   278  	cfg.Mutex.RLock()
   279  	defer cfg.Mutex.RUnlock()
   280  
   281  	reqDashboardName, reqTabName := req.GetDashboard(), req.GetTab()
   282  
   283  	_, tabName, _, err := findDashboardTab(cfg, reqDashboardName, reqTabName)
   284  	if err != nil {
   285  		return nil, fmt.Errorf("invalid request input {%q, %q}: %v", reqDashboardName, reqTabName, err)
   286  	}
   287  
   288  	summary, err := s.fetchSummary(ctx, scope, reqDashboardName)
   289  	if err != nil {
   290  		return nil, fmt.Errorf("failed to fetch summary for dashboard {%q}: %v", reqDashboardName, err)
   291  	}
   292  
   293  	if summary == nil {
   294  		return nil, fmt.Errorf("summary for dashboard {%q} not found.", reqDashboardName)
   295  	}
   296  
   297  	var resp apipb.GetTabSummaryResponse
   298  	for _, tabSummary := range summary.GetTabSummaries() {
   299  		if tabSummary.DashboardTabName == tabName {
   300  			resp.TabSummary = convertSummary(tabSummary)
   301  			return &resp, nil
   302  		}
   303  	}
   304  
   305  	return nil, fmt.Errorf("failed to find summary for tab {%q}.", tabName)
   306  }
   307  
   308  // GetTabSummaryHTTP returns the tab summary as a json.
   309  // Response json: GetTabSummaryResponse
   310  func (s Server) GetTabSummaryHTTP(w http.ResponseWriter, r *http.Request) {
   311  	req := apipb.GetTabSummaryRequest{
   312  		Scope:     r.URL.Query().Get(scopeParam),
   313  		Dashboard: chi.URLParam(r, "dashboard"),
   314  		Tab:       chi.URLParam(r, "tab"),
   315  	}
   316  	resp, err := s.GetTabSummary(r.Context(), &req)
   317  	if err != nil {
   318  		http.Error(w, err.Error(), http.StatusNotFound)
   319  		return
   320  	}
   321  
   322  	s.writeJSON(w, resp)
   323  }
   324  
   325  // ListDashboardSummaries returns the list of dashboard summaries for the particular dashboard group. Think of it as aggregated view of ListTabSummaries data.
   326  // Dashboard group name doesn't have to be normalized. Returns an error iff
   327  // - dashboard name does not exist in config
   328  // - the server can't read summary from GCS bucket
   329  func (s *Server) ListDashboardSummaries(ctx context.Context, req *apipb.ListDashboardSummariesRequest) (*apipb.ListDashboardSummariesResponse, error) {
   330  	ctx, cancel := context.WithTimeout(ctx, s.Timeout)
   331  	defer cancel()
   332  
   333  	scope := req.GetScope()
   334  	cfg, err := s.getConfig(ctx, logrus.WithContext(ctx), scope)
   335  
   336  	// TODO(sultan-duisenbay): return canonical error codes
   337  	if err != nil {
   338  		return nil, fmt.Errorf("failed to fetch config from {%q}: %v", scope, err)
   339  	}
   340  
   341  	cfg.Mutex.RLock()
   342  	defer cfg.Mutex.RUnlock()
   343  
   344  	dashboardGroupKey := config.Normalize(req.GetDashboardGroup())
   345  	denormalizedName, ok := cfg.NormalDashboardGroup[dashboardGroupKey]
   346  	if !ok {
   347  		return nil, fmt.Errorf("dashboard group {%q} not found", denormalizedName)
   348  	}
   349  
   350  	var resp apipb.ListDashboardSummariesResponse
   351  	for _, dashboardName := range cfg.Config.DashboardGroups[denormalizedName].DashboardNames {
   352  		summary, err := s.fetchSummary(ctx, scope, dashboardName)
   353  		if err != nil {
   354  			return nil, fmt.Errorf("failed to fetch summary for dashboard {%q}: %v", dashboardName, err)
   355  		}
   356  		// skip over non-existing dashboards
   357  		if summary == nil {
   358  			continue
   359  		}
   360  		resp.DashboardSummaries = append(resp.DashboardSummaries, dashboardSummary(summary, dashboardName))
   361  	}
   362  
   363  	return &resp, nil
   364  }
   365  
   366  // ListDashboardSummariesHTTP returns the list of dashboard summaries as a json.
   367  // Response json: ListDashboardSummariesResponse
   368  func (s Server) ListDashboardSummariesHTTP(w http.ResponseWriter, r *http.Request) {
   369  	req := apipb.ListDashboardSummariesRequest{
   370  		Scope:          r.URL.Query().Get(scopeParam),
   371  		DashboardGroup: chi.URLParam(r, "dashboard-group"),
   372  	}
   373  	resp, err := s.ListDashboardSummaries(r.Context(), &req)
   374  	if err != nil {
   375  		http.Error(w, err.Error(), http.StatusNotFound)
   376  		return
   377  	}
   378  
   379  	s.writeJSON(w, resp)
   380  }
   381  
   382  // GetDashboardSummary returns the dashboard summary for the particular dashboard. Think of it as aggregated view of ListTabSummaries data.
   383  // Dashboard name doesn't have to be normalized. Returns an error iff
   384  // - dashboard name does not exist in config
   385  // - the server can't read summary from GCS bucket
   386  // - dashboard summary doesn't exist
   387  func (s *Server) GetDashboardSummary(ctx context.Context, req *apipb.GetDashboardSummaryRequest) (*apipb.GetDashboardSummaryResponse, error) {
   388  	ctx, cancel := context.WithTimeout(ctx, s.Timeout)
   389  	defer cancel()
   390  
   391  	scope := req.GetScope()
   392  	cfg, err := s.getConfig(ctx, logrus.WithContext(ctx), scope)
   393  
   394  	// TODO(sultan-duisenbay): return canonical error codes
   395  	if err != nil {
   396  		return nil, fmt.Errorf("failed to fetch config from {%q}: %v", scope, err)
   397  	}
   398  
   399  	cfg.Mutex.RLock()
   400  	defer cfg.Mutex.RUnlock()
   401  
   402  	dashboardKey := config.Normalize(req.GetDashboard())
   403  	denormalizedName, ok := cfg.NormalDashboard[dashboardKey]
   404  	if !ok {
   405  		return nil, fmt.Errorf("dashboard {%q} not found", dashboardKey)
   406  	}
   407  
   408  	summary, err := s.fetchSummary(ctx, scope, dashboardKey)
   409  	if err != nil {
   410  		return nil, fmt.Errorf("failed to fetch summary for dashboard {%q}: %v", dashboardKey, err)
   411  	}
   412  
   413  	if summary == nil {
   414  		return nil, fmt.Errorf("summary for dashboard {%q} not found", dashboardKey)
   415  	}
   416  
   417  	return &apipb.GetDashboardSummaryResponse{
   418  		DashboardSummary: dashboardSummary(summary, denormalizedName),
   419  	}, nil
   420  
   421  }
   422  
   423  // GetDashboardSummaryHTTP returns the dashboard summary as a json.
   424  // Response json: GetDashboardSummaryResponse
   425  func (s Server) GetDashboardSummaryHTTP(w http.ResponseWriter, r *http.Request) {
   426  	req := apipb.GetDashboardSummaryRequest{
   427  		Scope:     r.URL.Query().Get(scopeParam),
   428  		Dashboard: chi.URLParam(r, "dashboard"),
   429  	}
   430  	resp, err := s.GetDashboardSummary(r.Context(), &req)
   431  	if err != nil {
   432  		http.Error(w, err.Error(), http.StatusNotFound)
   433  		return
   434  	}
   435  
   436  	s.writeJSON(w, resp)
   437  }
   438  
   439  // dashboardSummary generates a dashboard summary in a wire data format defined in api/v1/data.proto
   440  // overall dashboard status is defined by priority/severity within this function.
   441  func dashboardSummary(summary *summarypb.DashboardSummary, dashboardName string) *apipb.DashboardSummary {
   442  
   443  	tabStatusCount := make(map[string]int32)
   444  	for _, tab := range summary.TabSummaries {
   445  		statusStr := tabStatusStr[tab.OverallStatus]
   446  		tabStatusCount[statusStr]++
   447  	}
   448  
   449  	overallStatus := unknown
   450  	switch {
   451  	case tabStatusCount[broken] > 0:
   452  		overallStatus = broken
   453  	case tabStatusCount[stale] > 0:
   454  		overallStatus = stale
   455  	case tabStatusCount[failing] > 0:
   456  		overallStatus = failing
   457  	case tabStatusCount[flaky] > 0:
   458  		overallStatus = flaky
   459  	case tabStatusCount[pending] > 0:
   460  		overallStatus = pending
   461  	case tabStatusCount[acceptable] > 0:
   462  		overallStatus = acceptable
   463  	case tabStatusCount[passing] > 0:
   464  		overallStatus = passing
   465  	}
   466  
   467  	return &apipb.DashboardSummary{
   468  		Name:           dashboardName,
   469  		OverallStatus:  overallStatus,
   470  		TabStatusCount: tabStatusCount,
   471  	}
   472  }