github.com/kubeshop/testkube@v1.17.23/pkg/agent/testworkflows.go (about) 1 package agent 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "math" 8 "time" 9 10 "github.com/pkg/errors" 11 "golang.org/x/sync/errgroup" 12 "google.golang.org/grpc" 13 "google.golang.org/grpc/encoding/gzip" 14 15 "github.com/kubeshop/testkube/pkg/api/v1/testkube" 16 "github.com/kubeshop/testkube/pkg/cloud" 17 ) 18 19 const testWorkflowNotificationsRetryCount = 10 20 21 func getTestWorkflowNotificationType(n testkube.TestWorkflowExecutionNotification) cloud.TestWorkflowNotificationType { 22 if n.Result != nil { 23 return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_RESULT 24 } else if n.Output != nil { 25 return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_OUTPUT 26 } 27 return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_LOG 28 } 29 30 func (ag *Agent) runTestWorkflowNotificationsLoop(ctx context.Context) error { 31 ctx = AddAPIKeyMeta(ctx, ag.apiKey) 32 33 ag.logger.Infow("initiating workflow notifications streaming connection with Cloud API") 34 // creates a new Stream from the client side. ctx is used for the lifetime of the stream. 35 opts := []grpc.CallOption{grpc.UseCompressor(gzip.Name), grpc.MaxCallRecvMsgSize(math.MaxInt32)} 36 stream, err := ag.client.GetTestWorkflowNotificationsStream(ctx, opts...) 37 if err != nil { 38 ag.logger.Errorf("failed to execute: %w", err) 39 return errors.Wrap(err, "failed to setup stream") 40 } 41 42 // GRPC stream have special requirements for concurrency on SendMsg, and RecvMsg calls. 43 // Please check https://github.com/grpc/grpc-go/blob/master/Documentation/concurrency.md 44 g, groupCtx := errgroup.WithContext(ctx) 45 g.Go(func() error { 46 for { 47 cmd, err := ag.receiveTestWorkflowNotificationsRequest(groupCtx, stream) 48 if err != nil { 49 return err 50 } 51 52 ag.testWorkflowNotificationsRequestBuffer <- cmd 53 } 54 }) 55 56 g.Go(func() error { 57 for { 58 select { 59 case resp := <-ag.testWorkflowNotificationsResponseBuffer: 60 err := ag.sendTestWorkflowNotificationsResponse(groupCtx, stream, resp) 61 if err != nil { 62 return err 63 } 64 case <-groupCtx.Done(): 65 return groupCtx.Err() 66 } 67 } 68 }) 69 70 err = g.Wait() 71 72 return err 73 } 74 75 func (ag *Agent) runTestWorkflowNotificationsWorker(ctx context.Context, numWorkers int) error { 76 g, groupCtx := errgroup.WithContext(ctx) 77 for i := 0; i < numWorkers; i++ { 78 g.Go(func() error { 79 for { 80 select { 81 case req := <-ag.testWorkflowNotificationsRequestBuffer: 82 if req.RequestType == cloud.TestWorkflowNotificationsRequestType_WORKFLOW_STREAM_HEALTH_CHECK { 83 ag.testWorkflowNotificationsResponseBuffer <- &cloud.TestWorkflowNotificationsResponse{ 84 StreamId: req.StreamId, 85 SeqNo: 0, 86 } 87 break 88 } 89 90 err := ag.executeWorkflowNotificationsRequest(groupCtx, req) 91 if err != nil { 92 ag.logger.Errorf("error executing workflow notifications request: %s", err.Error()) 93 } 94 case <-groupCtx.Done(): 95 return groupCtx.Err() 96 } 97 } 98 }) 99 } 100 return g.Wait() 101 } 102 103 func (ag *Agent) executeWorkflowNotificationsRequest(ctx context.Context, req *cloud.TestWorkflowNotificationsRequest) error { 104 notificationsCh, err := ag.testWorkflowNotificationsFunc(ctx, req.ExecutionId) 105 for i := 0; i < testWorkflowNotificationsRetryCount; i++ { 106 if err != nil { 107 // We have a race condition here 108 // Cloud sometimes slow to insert execution or test 109 // while WorkflowNotifications request from websockets comes in faster 110 // so we retry up to testWorkflowNotificationsRetryCount times. 111 time.Sleep(100 * time.Millisecond) 112 notificationsCh, err = ag.testWorkflowNotificationsFunc(ctx, req.ExecutionId) 113 } 114 } 115 if err != nil { 116 message := fmt.Sprintf("cannot get pod logs: %s", err.Error()) 117 ag.testWorkflowNotificationsResponseBuffer <- &cloud.TestWorkflowNotificationsResponse{ 118 StreamId: req.StreamId, 119 SeqNo: 0, 120 Type: cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_ERROR, 121 Message: message, 122 } 123 return nil 124 } 125 126 for { 127 var i uint32 128 select { 129 case n, ok := <-notificationsCh: 130 if !ok { 131 return nil 132 } 133 t := getTestWorkflowNotificationType(n) 134 msg := &cloud.TestWorkflowNotificationsResponse{ 135 StreamId: req.StreamId, 136 SeqNo: i, 137 Timestamp: n.Ts.Format(time.RFC3339Nano), 138 Ref: n.Ref, 139 Type: t, 140 } 141 if n.Result != nil { 142 m, _ := json.Marshal(n.Result) 143 msg.Message = string(m) 144 } else if n.Output != nil { 145 m, _ := json.Marshal(n.Output) 146 msg.Message = string(m) 147 } else { 148 msg.Message = n.Log 149 } 150 i++ 151 152 select { 153 case ag.testWorkflowNotificationsResponseBuffer <- msg: 154 case <-ctx.Done(): 155 return ctx.Err() 156 } 157 case <-ctx.Done(): 158 return ctx.Err() 159 } 160 } 161 } 162 163 func (ag *Agent) receiveTestWorkflowNotificationsRequest(ctx context.Context, stream cloud.TestKubeCloudAPI_GetTestWorkflowNotificationsStreamClient) (*cloud.TestWorkflowNotificationsRequest, error) { 164 respChan := make(chan testWorkflowNotificationsRequest, 1) 165 go func() { 166 cmd, err := stream.Recv() 167 respChan <- testWorkflowNotificationsRequest{resp: cmd, err: err} 168 }() 169 170 var cmd *cloud.TestWorkflowNotificationsRequest 171 select { 172 case resp := <-respChan: 173 cmd = resp.resp 174 err := resp.err 175 176 if err != nil { 177 ag.logger.Errorf("agent stream receive: %v", err) 178 return nil, err 179 } 180 case <-ctx.Done(): 181 return nil, ctx.Err() 182 } 183 184 return cmd, nil 185 } 186 187 type testWorkflowNotificationsRequest struct { 188 resp *cloud.TestWorkflowNotificationsRequest 189 err error 190 } 191 192 func (ag *Agent) sendTestWorkflowNotificationsResponse(ctx context.Context, stream cloud.TestKubeCloudAPI_GetTestWorkflowNotificationsStreamClient, resp *cloud.TestWorkflowNotificationsResponse) error { 193 errChan := make(chan error, 1) 194 go func() { 195 errChan <- stream.Send(resp) 196 close(errChan) 197 }() 198 199 t := time.NewTimer(ag.sendTimeout) 200 select { 201 case err := <-errChan: 202 if !t.Stop() { 203 <-t.C 204 } 205 return err 206 case <-ctx.Done(): 207 if !t.Stop() { 208 <-t.C 209 } 210 211 return ctx.Err() 212 case <-t.C: 213 return errors.New("send response too slow") 214 } 215 }