github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/updater/pubsub.go (about) 1 /* 2 Copyright 2021 The TestGrid Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package updater 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "math/rand" 24 "path" 25 "sort" 26 "strings" 27 "sync" 28 "time" 29 30 "github.com/GoogleCloudPlatform/testgrid/config" 31 configpb "github.com/GoogleCloudPlatform/testgrid/pb/config" 32 "github.com/GoogleCloudPlatform/testgrid/pkg/pubsub" 33 "github.com/GoogleCloudPlatform/testgrid/util/gcs" 34 "github.com/sirupsen/logrus" 35 ) 36 37 // FixGCS listens for changes to GCS files and schedules another update of those groups ~immediately. 38 // 39 // Limited to test groups with a gcs_config result_source that includes pubsub info. 40 // Returns when the context is canceled or a processing error occurs. 41 func FixGCS(subscriber pubsub.Subscriber) Fixer { 42 return func(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, groups []*configpb.TestGroup) error { 43 paths, subs, err := gcsSubscribedPaths(groups) 44 if err != nil { 45 return fmt.Errorf("group paths: %v", err) 46 } 47 ctx, cancel := context.WithCancel(ctx) 48 defer cancel() 49 var wg sync.WaitGroup 50 ch := make(chan *pubsub.Notification) 51 wg.Add(1) 52 go func() { 53 defer wg.Done() 54 subscribeGCS(ctx, log, subscriber, ch, subs...) 55 }() 56 return processGCSNotifications(ctx, log, q, paths, ch) 57 } 58 } 59 60 func gcsSubscribedPaths(tgs []*configpb.TestGroup) (map[gcs.Path][]string, []subscription, error) { 61 paths := make(map[gcs.Path][]string, len(tgs)) 62 subscriptions := map[subscription]bool{} 63 64 for _, tg := range tgs { 65 sub := groupSubscription(tg) 66 if sub == nil { 67 continue 68 } 69 subscriptions[*sub] = true 70 name := tg.Name 71 gps, err := groupPaths(tg) 72 if err != nil { 73 return nil, nil, fmt.Errorf("%s: %v", name, err) 74 } 75 for _, gp := range gps { 76 paths[gp] = append(paths[gp], name) 77 } 78 } 79 var subs []subscription 80 if n := len(subscriptions); n > 0 { 81 subs = make([]subscription, 0, n) 82 for sub := range subscriptions { 83 subs = append(subs, sub) 84 } 85 } 86 return paths, subs, nil 87 } 88 89 func groupSubscription(tg *configpb.TestGroup) *subscription { 90 cfg := tg.GetResultSource().GetGcsConfig() 91 if cfg == nil { 92 return manualGroupSubscription(tg) 93 } 94 proj, sub := cfg.PubsubProject, cfg.PubsubSubscription 95 if proj == "" || sub == "" { 96 return nil 97 } 98 return &subscription{proj, sub} 99 } 100 101 var manualSubs map[string]subscription 102 103 // AddManualSubscription allows injecting additional subscriptions that are not 104 // specified by the test group itself. 105 // 106 // Likely to be removed (or migrated into the config.proto) in a future version. 107 func AddManualSubscription(projID, subID, prefix string) { 108 if manualSubs == nil { 109 manualSubs = map[string]subscription{} 110 } 111 manualSubs[prefix] = subscription{projID, subID} 112 } 113 114 func manualGroupSubscription(tg *configpb.TestGroup) *subscription { 115 gp := gcsPrefix(tg) 116 for prefix, sub := range manualSubs { 117 if strings.HasPrefix(gp, prefix) { 118 return &sub 119 } 120 } 121 return nil 122 } 123 124 type subscription struct { 125 proj string 126 sub string 127 } 128 129 func (s subscription) String() string { 130 return fmt.Sprintf("pubsub://%s/%s", s.proj, s.sub) 131 } 132 133 // Begin sending notifications for this subscription to the channel. 134 // 135 // Automatically cancels an existing routine listening to this subscription. 136 func subscribeGCS(ctx context.Context, log logrus.FieldLogger, client pubsub.Subscriber, receivers chan<- *pubsub.Notification, subs ...subscription) { 137 var wg sync.WaitGroup 138 wg.Add(len(subs)) 139 defer wg.Wait() 140 for _, sub := range subs { 141 log := log.WithField("subscription", sub.String()) 142 projID, subID := sub.proj, sub.sub 143 log.Debug("Subscribed to GCS changes") 144 go func() { 145 defer wg.Done() 146 for { 147 err := pubsub.SendGCS(ctx, log, client, projID, subID, nil, receivers) 148 if err == nil { 149 return 150 } 151 if errors.Is(err, context.Canceled) || ctx.Err() != nil { 152 log.WithError(err).Trace("Subscription canceled") 153 return 154 } 155 sleep := time.Minute + time.Duration(rand.Int63n(int64(time.Minute))) 156 log.WithError(err).WithField("sleep", sleep).Error("Error receiving GCS notifications, will retry...") 157 time.Sleep(sleep) 158 } 159 }() 160 } 161 } 162 163 var ( 164 timeNow = func() time.Time { return time.Now() } 165 ) 166 167 func processGCSNotifications(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, paths map[gcs.Path][]string, senders <-chan *pubsub.Notification) error { 168 for { 169 select { 170 case <-ctx.Done(): 171 return ctx.Err() 172 case notice := <-senders: 173 groups, delay := processNotification(paths, notice) 174 if len(groups) == 0 { 175 break 176 } 177 when := notice.Time.Add(delay) 178 if time.Until(when) < 0 { 179 when = timeNow() 180 } 181 log.WithFields(logrus.Fields{ 182 "groups": groups, 183 "when": when, 184 "notification": notice, 185 }).Trace("Fixing groups from gcs notifcation") 186 if len(groups) == 1 { 187 name := groups[0] 188 if err := q.Fix(name, when, false); err != nil { 189 return fmt.Errorf("fix %q: %w", name, err) 190 } 191 continue 192 } 193 whens := make(map[string]time.Time, len(groups)) 194 for _, g := range groups { 195 whens[g] = when 196 } 197 if err := q.FixAll(whens, false); err != nil { 198 return fmt.Errorf("fix all: %w", err) 199 } 200 } 201 } 202 } 203 204 var namedDurations = map[string]time.Duration{ 205 "podinfo.json": 30 * time.Second, // Done 206 "finished.json": 5 * time.Minute, // Container done, wait for prowjob to finish 207 "metadata.json": 5 * time.Minute, // Should finish soon 208 "started.json": 30 * time.Second, // Running 209 } 210 211 // Try to balance providing up-to-date info with minimal redundant processing. 212 // In particular, when the job finishes the sidecar container will upload: 213 // * a bunch of junit files, then finished.json 214 // Soon after this crier will notice the prowjob has been finalized and the gcsreporter should: 215 // * upload podinfo.json 216 // 217 // Ideally in this scenario we give the system time to upload everything and process this data once. 218 func processNotification(paths map[gcs.Path][]string, n *pubsub.Notification) ([]string, time.Duration) { 219 var out []string 220 b, obj := n.Path.Bucket(), n.Path.Object() 221 base := path.Base(obj) 222 dur, ok := namedDurations[base] 223 if !ok { // Maybe it is an interesting file 224 switch { 225 case strings.HasPrefix(base, "junit") && strings.HasSuffix(base, ".xml"): // row data 226 dur = 5 * time.Minute 227 case strings.HasSuffix(base, ".txt") && strings.Contains(obj, "directory/"): // symlink to actual data 228 dur = 5 * time.Minute 229 default: 230 return nil, 0 231 } 232 } 233 234 for path, groups := range paths { 235 if path.Bucket() != b { 236 continue 237 } 238 if !strings.HasPrefix(obj, path.Object()) { 239 continue 240 } 241 out = append(out, groups...) 242 } 243 if len(out) == 0 { 244 return nil, 0 245 } 246 sort.Strings(out) 247 return out, dur 248 }