github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/updater/pubsub.go (about)

     1  /*
     2  Copyright 2021 The TestGrid Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package updater
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"math/rand"
    24  	"path"
    25  	"sort"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/GoogleCloudPlatform/testgrid/config"
    31  	configpb "github.com/GoogleCloudPlatform/testgrid/pb/config"
    32  	"github.com/GoogleCloudPlatform/testgrid/pkg/pubsub"
    33  	"github.com/GoogleCloudPlatform/testgrid/util/gcs"
    34  	"github.com/sirupsen/logrus"
    35  )
    36  
    37  // FixGCS listens for changes to GCS files and schedules another update of those groups ~immediately.
    38  //
    39  // Limited to test groups with a gcs_config result_source that includes pubsub info.
    40  // Returns when the context is canceled or a processing error occurs.
    41  func FixGCS(subscriber pubsub.Subscriber) Fixer {
    42  	return func(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, groups []*configpb.TestGroup) error {
    43  		paths, subs, err := gcsSubscribedPaths(groups)
    44  		if err != nil {
    45  			return fmt.Errorf("group paths: %v", err)
    46  		}
    47  		ctx, cancel := context.WithCancel(ctx)
    48  		defer cancel()
    49  		var wg sync.WaitGroup
    50  		ch := make(chan *pubsub.Notification)
    51  		wg.Add(1)
    52  		go func() {
    53  			defer wg.Done()
    54  			subscribeGCS(ctx, log, subscriber, ch, subs...)
    55  		}()
    56  		return processGCSNotifications(ctx, log, q, paths, ch)
    57  	}
    58  }
    59  
    60  func gcsSubscribedPaths(tgs []*configpb.TestGroup) (map[gcs.Path][]string, []subscription, error) {
    61  	paths := make(map[gcs.Path][]string, len(tgs))
    62  	subscriptions := map[subscription]bool{}
    63  
    64  	for _, tg := range tgs {
    65  		sub := groupSubscription(tg)
    66  		if sub == nil {
    67  			continue
    68  		}
    69  		subscriptions[*sub] = true
    70  		name := tg.Name
    71  		gps, err := groupPaths(tg)
    72  		if err != nil {
    73  			return nil, nil, fmt.Errorf("%s: %v", name, err)
    74  		}
    75  		for _, gp := range gps {
    76  			paths[gp] = append(paths[gp], name)
    77  		}
    78  	}
    79  	var subs []subscription
    80  	if n := len(subscriptions); n > 0 {
    81  		subs = make([]subscription, 0, n)
    82  		for sub := range subscriptions {
    83  			subs = append(subs, sub)
    84  		}
    85  	}
    86  	return paths, subs, nil
    87  }
    88  
    89  func groupSubscription(tg *configpb.TestGroup) *subscription {
    90  	cfg := tg.GetResultSource().GetGcsConfig()
    91  	if cfg == nil {
    92  		return manualGroupSubscription(tg)
    93  	}
    94  	proj, sub := cfg.PubsubProject, cfg.PubsubSubscription
    95  	if proj == "" || sub == "" {
    96  		return nil
    97  	}
    98  	return &subscription{proj, sub}
    99  }
   100  
   101  var manualSubs map[string]subscription
   102  
   103  // AddManualSubscription allows injecting additional subscriptions that are not
   104  // specified by the test group itself.
   105  //
   106  // Likely to be removed (or migrated into the config.proto) in a future version.
   107  func AddManualSubscription(projID, subID, prefix string) {
   108  	if manualSubs == nil {
   109  		manualSubs = map[string]subscription{}
   110  	}
   111  	manualSubs[prefix] = subscription{projID, subID}
   112  }
   113  
   114  func manualGroupSubscription(tg *configpb.TestGroup) *subscription {
   115  	gp := gcsPrefix(tg)
   116  	for prefix, sub := range manualSubs {
   117  		if strings.HasPrefix(gp, prefix) {
   118  			return &sub
   119  		}
   120  	}
   121  	return nil
   122  }
   123  
   124  type subscription struct {
   125  	proj string
   126  	sub  string
   127  }
   128  
   129  func (s subscription) String() string {
   130  	return fmt.Sprintf("pubsub://%s/%s", s.proj, s.sub)
   131  }
   132  
   133  // Begin sending notifications for this subscription to the channel.
   134  //
   135  // Automatically cancels an existing routine listening to this subscription.
   136  func subscribeGCS(ctx context.Context, log logrus.FieldLogger, client pubsub.Subscriber, receivers chan<- *pubsub.Notification, subs ...subscription) {
   137  	var wg sync.WaitGroup
   138  	wg.Add(len(subs))
   139  	defer wg.Wait()
   140  	for _, sub := range subs {
   141  		log := log.WithField("subscription", sub.String())
   142  		projID, subID := sub.proj, sub.sub
   143  		log.Debug("Subscribed to GCS changes")
   144  		go func() {
   145  			defer wg.Done()
   146  			for {
   147  				err := pubsub.SendGCS(ctx, log, client, projID, subID, nil, receivers)
   148  				if err == nil {
   149  					return
   150  				}
   151  				if errors.Is(err, context.Canceled) || ctx.Err() != nil {
   152  					log.WithError(err).Trace("Subscription canceled")
   153  					return
   154  				}
   155  				sleep := time.Minute + time.Duration(rand.Int63n(int64(time.Minute)))
   156  				log.WithError(err).WithField("sleep", sleep).Error("Error receiving GCS notifications, will retry...")
   157  				time.Sleep(sleep)
   158  			}
   159  		}()
   160  	}
   161  }
   162  
   163  var (
   164  	timeNow = func() time.Time { return time.Now() }
   165  )
   166  
   167  func processGCSNotifications(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, paths map[gcs.Path][]string, senders <-chan *pubsub.Notification) error {
   168  	for {
   169  		select {
   170  		case <-ctx.Done():
   171  			return ctx.Err()
   172  		case notice := <-senders:
   173  			groups, delay := processNotification(paths, notice)
   174  			if len(groups) == 0 {
   175  				break
   176  			}
   177  			when := notice.Time.Add(delay)
   178  			if time.Until(when) < 0 {
   179  				when = timeNow()
   180  			}
   181  			log.WithFields(logrus.Fields{
   182  				"groups":       groups,
   183  				"when":         when,
   184  				"notification": notice,
   185  			}).Trace("Fixing groups from gcs notifcation")
   186  			if len(groups) == 1 {
   187  				name := groups[0]
   188  				if err := q.Fix(name, when, false); err != nil {
   189  					return fmt.Errorf("fix %q: %w", name, err)
   190  				}
   191  				continue
   192  			}
   193  			whens := make(map[string]time.Time, len(groups))
   194  			for _, g := range groups {
   195  				whens[g] = when
   196  			}
   197  			if err := q.FixAll(whens, false); err != nil {
   198  				return fmt.Errorf("fix all: %w", err)
   199  			}
   200  		}
   201  	}
   202  }
   203  
   204  var namedDurations = map[string]time.Duration{
   205  	"podinfo.json":  30 * time.Second, // Done
   206  	"finished.json": 5 * time.Minute,  // Container done, wait for prowjob to finish
   207  	"metadata.json": 5 * time.Minute,  // Should finish soon
   208  	"started.json":  30 * time.Second, // Running
   209  }
   210  
   211  // Try to balance providing up-to-date info with minimal redundant processing.
   212  // In particular, when the job finishes the sidecar container will upload:
   213  // * a bunch of junit files, then finished.json
   214  // Soon after this crier will notice the prowjob has been finalized and the gcsreporter should:
   215  // * upload podinfo.json
   216  //
   217  // Ideally in this scenario we give the system time to upload everything and process this data once.
   218  func processNotification(paths map[gcs.Path][]string, n *pubsub.Notification) ([]string, time.Duration) {
   219  	var out []string
   220  	b, obj := n.Path.Bucket(), n.Path.Object()
   221  	base := path.Base(obj)
   222  	dur, ok := namedDurations[base]
   223  	if !ok { // Maybe it is an interesting file
   224  		switch {
   225  		case strings.HasPrefix(base, "junit") && strings.HasSuffix(base, ".xml"): // row data
   226  			dur = 5 * time.Minute
   227  		case strings.HasSuffix(base, ".txt") && strings.Contains(obj, "directory/"): // symlink to actual data
   228  			dur = 5 * time.Minute
   229  		default:
   230  			return nil, 0
   231  		}
   232  	}
   233  
   234  	for path, groups := range paths {
   235  		if path.Bucket() != b {
   236  			continue
   237  		}
   238  		if !strings.HasPrefix(obj, path.Object()) {
   239  			continue
   240  		}
   241  		out = append(out, groups...)
   242  	}
   243  	if len(out) == 0 {
   244  		return nil, 0
   245  	}
   246  	sort.Strings(out)
   247  	return out, dur
   248  }