github.com/kubeshop/testkube@v1.17.23/pkg/agent/testworkflows.go (about)

     1  package agent
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"math"
     8  	"time"
     9  
    10  	"github.com/pkg/errors"
    11  	"golang.org/x/sync/errgroup"
    12  	"google.golang.org/grpc"
    13  	"google.golang.org/grpc/encoding/gzip"
    14  
    15  	"github.com/kubeshop/testkube/pkg/api/v1/testkube"
    16  	"github.com/kubeshop/testkube/pkg/cloud"
    17  )
    18  
    19  const testWorkflowNotificationsRetryCount = 10
    20  
    21  func getTestWorkflowNotificationType(n testkube.TestWorkflowExecutionNotification) cloud.TestWorkflowNotificationType {
    22  	if n.Result != nil {
    23  		return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_RESULT
    24  	} else if n.Output != nil {
    25  		return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_OUTPUT
    26  	}
    27  	return cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_LOG
    28  }
    29  
    30  func (ag *Agent) runTestWorkflowNotificationsLoop(ctx context.Context) error {
    31  	ctx = AddAPIKeyMeta(ctx, ag.apiKey)
    32  
    33  	ag.logger.Infow("initiating workflow notifications streaming connection with Cloud API")
    34  	// creates a new Stream from the client side. ctx is used for the lifetime of the stream.
    35  	opts := []grpc.CallOption{grpc.UseCompressor(gzip.Name), grpc.MaxCallRecvMsgSize(math.MaxInt32)}
    36  	stream, err := ag.client.GetTestWorkflowNotificationsStream(ctx, opts...)
    37  	if err != nil {
    38  		ag.logger.Errorf("failed to execute: %w", err)
    39  		return errors.Wrap(err, "failed to setup stream")
    40  	}
    41  
    42  	// GRPC stream have special requirements for concurrency on SendMsg, and RecvMsg calls.
    43  	// Please check https://github.com/grpc/grpc-go/blob/master/Documentation/concurrency.md
    44  	g, groupCtx := errgroup.WithContext(ctx)
    45  	g.Go(func() error {
    46  		for {
    47  			cmd, err := ag.receiveTestWorkflowNotificationsRequest(groupCtx, stream)
    48  			if err != nil {
    49  				return err
    50  			}
    51  
    52  			ag.testWorkflowNotificationsRequestBuffer <- cmd
    53  		}
    54  	})
    55  
    56  	g.Go(func() error {
    57  		for {
    58  			select {
    59  			case resp := <-ag.testWorkflowNotificationsResponseBuffer:
    60  				err := ag.sendTestWorkflowNotificationsResponse(groupCtx, stream, resp)
    61  				if err != nil {
    62  					return err
    63  				}
    64  			case <-groupCtx.Done():
    65  				return groupCtx.Err()
    66  			}
    67  		}
    68  	})
    69  
    70  	err = g.Wait()
    71  
    72  	return err
    73  }
    74  
    75  func (ag *Agent) runTestWorkflowNotificationsWorker(ctx context.Context, numWorkers int) error {
    76  	g, groupCtx := errgroup.WithContext(ctx)
    77  	for i := 0; i < numWorkers; i++ {
    78  		g.Go(func() error {
    79  			for {
    80  				select {
    81  				case req := <-ag.testWorkflowNotificationsRequestBuffer:
    82  					if req.RequestType == cloud.TestWorkflowNotificationsRequestType_WORKFLOW_STREAM_HEALTH_CHECK {
    83  						ag.testWorkflowNotificationsResponseBuffer <- &cloud.TestWorkflowNotificationsResponse{
    84  							StreamId: req.StreamId,
    85  							SeqNo:    0,
    86  						}
    87  						break
    88  					}
    89  
    90  					err := ag.executeWorkflowNotificationsRequest(groupCtx, req)
    91  					if err != nil {
    92  						ag.logger.Errorf("error executing workflow notifications request: %s", err.Error())
    93  					}
    94  				case <-groupCtx.Done():
    95  					return groupCtx.Err()
    96  				}
    97  			}
    98  		})
    99  	}
   100  	return g.Wait()
   101  }
   102  
   103  func (ag *Agent) executeWorkflowNotificationsRequest(ctx context.Context, req *cloud.TestWorkflowNotificationsRequest) error {
   104  	notificationsCh, err := ag.testWorkflowNotificationsFunc(ctx, req.ExecutionId)
   105  	for i := 0; i < testWorkflowNotificationsRetryCount; i++ {
   106  		if err != nil {
   107  			// We have a race condition here
   108  			// Cloud sometimes slow to insert execution or test
   109  			// while WorkflowNotifications request from websockets comes in faster
   110  			// so we retry up to testWorkflowNotificationsRetryCount times.
   111  			time.Sleep(100 * time.Millisecond)
   112  			notificationsCh, err = ag.testWorkflowNotificationsFunc(ctx, req.ExecutionId)
   113  		}
   114  	}
   115  	if err != nil {
   116  		message := fmt.Sprintf("cannot get pod logs: %s", err.Error())
   117  		ag.testWorkflowNotificationsResponseBuffer <- &cloud.TestWorkflowNotificationsResponse{
   118  			StreamId: req.StreamId,
   119  			SeqNo:    0,
   120  			Type:     cloud.TestWorkflowNotificationType_WORKFLOW_STREAM_ERROR,
   121  			Message:  message,
   122  		}
   123  		return nil
   124  	}
   125  
   126  	for {
   127  		var i uint32
   128  		select {
   129  		case n, ok := <-notificationsCh:
   130  			if !ok {
   131  				return nil
   132  			}
   133  			t := getTestWorkflowNotificationType(n)
   134  			msg := &cloud.TestWorkflowNotificationsResponse{
   135  				StreamId:  req.StreamId,
   136  				SeqNo:     i,
   137  				Timestamp: n.Ts.Format(time.RFC3339Nano),
   138  				Ref:       n.Ref,
   139  				Type:      t,
   140  			}
   141  			if n.Result != nil {
   142  				m, _ := json.Marshal(n.Result)
   143  				msg.Message = string(m)
   144  			} else if n.Output != nil {
   145  				m, _ := json.Marshal(n.Output)
   146  				msg.Message = string(m)
   147  			} else {
   148  				msg.Message = n.Log
   149  			}
   150  			i++
   151  
   152  			select {
   153  			case ag.testWorkflowNotificationsResponseBuffer <- msg:
   154  			case <-ctx.Done():
   155  				return ctx.Err()
   156  			}
   157  		case <-ctx.Done():
   158  			return ctx.Err()
   159  		}
   160  	}
   161  }
   162  
   163  func (ag *Agent) receiveTestWorkflowNotificationsRequest(ctx context.Context, stream cloud.TestKubeCloudAPI_GetTestWorkflowNotificationsStreamClient) (*cloud.TestWorkflowNotificationsRequest, error) {
   164  	respChan := make(chan testWorkflowNotificationsRequest, 1)
   165  	go func() {
   166  		cmd, err := stream.Recv()
   167  		respChan <- testWorkflowNotificationsRequest{resp: cmd, err: err}
   168  	}()
   169  
   170  	var cmd *cloud.TestWorkflowNotificationsRequest
   171  	select {
   172  	case resp := <-respChan:
   173  		cmd = resp.resp
   174  		err := resp.err
   175  
   176  		if err != nil {
   177  			ag.logger.Errorf("agent stream receive: %v", err)
   178  			return nil, err
   179  		}
   180  	case <-ctx.Done():
   181  		return nil, ctx.Err()
   182  	}
   183  
   184  	return cmd, nil
   185  }
   186  
   187  type testWorkflowNotificationsRequest struct {
   188  	resp *cloud.TestWorkflowNotificationsRequest
   189  	err  error
   190  }
   191  
   192  func (ag *Agent) sendTestWorkflowNotificationsResponse(ctx context.Context, stream cloud.TestKubeCloudAPI_GetTestWorkflowNotificationsStreamClient, resp *cloud.TestWorkflowNotificationsResponse) error {
   193  	errChan := make(chan error, 1)
   194  	go func() {
   195  		errChan <- stream.Send(resp)
   196  		close(errChan)
   197  	}()
   198  
   199  	t := time.NewTimer(ag.sendTimeout)
   200  	select {
   201  	case err := <-errChan:
   202  		if !t.Stop() {
   203  			<-t.C
   204  		}
   205  		return err
   206  	case <-ctx.Done():
   207  		if !t.Stop() {
   208  			<-t.C
   209  		}
   210  
   211  		return ctx.Err()
   212  	case <-t.C:
   213  		return errors.New("send response too slow")
   214  	}
   215  }