github.com/kubeshop/testkube@v1.17.23/pkg/logs/service.go (about)

     1  // TODO how to handle pod issues here?
     2  // TODO how to know that there is topic which is not handled by any subscriber?
     3  // TODO we would need to check pending log topics and handle them after restart in case of log pod disaster
     4  
     5  package logs
     6  
     7  import (
     8  	"context"
     9  	"crypto/tls"
    10  	"crypto/x509"
    11  	"fmt"
    12  	"math/rand"
    13  	"net"
    14  	"net/http"
    15  	"os"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/nats-io/nats.go"
    20  	"github.com/nats-io/nats.go/jetstream"
    21  	"go.uber.org/zap"
    22  	"google.golang.org/grpc"
    23  	"google.golang.org/grpc/credentials"
    24  
    25  	"github.com/kubeshop/testkube/pkg/log"
    26  	"github.com/kubeshop/testkube/pkg/logs/adapter"
    27  	"github.com/kubeshop/testkube/pkg/logs/client"
    28  	"github.com/kubeshop/testkube/pkg/logs/pb"
    29  	"github.com/kubeshop/testkube/pkg/logs/repository"
    30  	"github.com/kubeshop/testkube/pkg/logs/state"
    31  )
    32  
    33  const (
    34  	DefaultHttpAddress = ":8080"
    35  	DefaultGrpcAddress = ":9090"
    36  
    37  	defaultStopPauseInterval = 200 * time.Millisecond
    38  )
    39  
    40  func NewLogsService(nats *nats.Conn, js jetstream.JetStream, state state.Interface, stream client.Stream) *LogsService {
    41  	return &LogsService{
    42  		nats:              nats,
    43  		adapters:          []adapter.Adapter{},
    44  		js:                js,
    45  		log:               log.DefaultLogger.With("service", "logs-service"),
    46  		Ready:             make(chan struct{}, 1),
    47  		httpAddress:       DefaultHttpAddress,
    48  		grpcAddress:       DefaultGrpcAddress,
    49  		consumerInstances: sync.Map{},
    50  		state:             state,
    51  		stopPauseInterval: defaultStopPauseInterval,
    52  		logStream:         stream,
    53  	}
    54  }
    55  
    56  type LogsService struct {
    57  	logsRepositoryFactory repository.Factory
    58  	log                   *zap.SugaredLogger
    59  	nats                  *nats.Conn
    60  	js                    jetstream.JetStream
    61  	adapters              []adapter.Adapter
    62  
    63  	// logStream to manage and send data to logs streams
    64  	logStream client.Stream
    65  
    66  	Ready chan struct{}
    67  
    68  	// grpcAddress is address for grpc server
    69  	grpcAddress string
    70  	// grpcServer is grpc server for logs service
    71  	grpcServer *grpc.Server
    72  
    73  	// httpAddress is address for Kubernetes http health check handler
    74  	httpAddress string
    75  	// httpServer is http server for health check (for Kubernetes below 1.25)
    76  	httpServer *http.Server
    77  
    78  	// consumerInstances is internal executionID => Consumer map which we need to clean
    79  	// each pod can have different executionId set of consumers
    80  	consumerInstances sync.Map
    81  
    82  	// state manager for keeping logs state (pending, finished)
    83  	// will allow to distiguish from where load data from in OSS
    84  	// cloud will be loading always them locally
    85  	state state.Interface
    86  
    87  	// stop wait time for messages cool down
    88  	stopPauseInterval time.Duration
    89  
    90  	// trace incoming messages
    91  	traceMessages bool
    92  }
    93  
    94  // AddAdapter adds new adapter to logs service adapters will be configred based on given mode
    95  // e.g. cloud mode will get cloud adapter to store logs directly on the cloud
    96  func (ls *LogsService) AddAdapter(a adapter.Adapter) {
    97  	ls.adapters = append(ls.adapters, a)
    98  }
    99  
   100  func (ls *LogsService) Run(ctx context.Context) (err error) {
   101  	// Handle start and stop events from nats
   102  	// assuming after start event something is pushing data to the stream
   103  	// it can be our handler or some other service
   104  	go ls.metrics()
   105  
   106  	// For start event we must build stream for given execution id and start consuming it
   107  	// this one will must follow a queue group each pod will get it's own bunch of executions to handle
   108  	// Start event will be triggered by logs process controller (scheduler)
   109  	// group is common name for both start and stop subjects
   110  	for group, subject := range StartSubjects {
   111  		ls.nats.QueueSubscribe(subject, StartQueue, ls.handleStart(ctx, group))
   112  	}
   113  
   114  	// listen on all pods as we don't control which one will have given consumer
   115  	// Stop event will be triggered by logs process controller (scheduler)
   116  	for group, subject := range StopSubjects {
   117  		ls.nats.Subscribe(subject, ls.handleStop(ctx, group))
   118  	}
   119  
   120  	// Send ready signal
   121  	ls.Ready <- struct{}{}
   122  
   123  	// block main routine
   124  	<-ctx.Done()
   125  
   126  	return nil
   127  }
   128  
   129  func (ls *LogsService) metrics() {
   130  	for {
   131  		count := 0
   132  		ls.consumerInstances.Range(func(_, _ interface{}) bool {
   133  			count++
   134  			return true
   135  		})
   136  		ls.log.Infow("metrics", "consumers", count)
   137  		time.Sleep(1 * time.Minute)
   138  	}
   139  }
   140  
   141  // TODO handle TLS
   142  func (ls *LogsService) RunGRPCServer(ctx context.Context, creds credentials.TransportCredentials) error {
   143  	lis, err := net.Listen("tcp", ls.grpcAddress)
   144  	if err != nil {
   145  		return err
   146  	}
   147  
   148  	var opts []grpc.ServerOption
   149  	if creds != nil {
   150  		opts = append(opts, grpc.Creds(creds))
   151  	}
   152  
   153  	ls.grpcServer = grpc.NewServer(opts...)
   154  
   155  	logsServer := NewLogsServer(ls.logsRepositoryFactory, ls.state).WithMessageTracing(ls.traceMessages)
   156  	pb.RegisterLogsServiceServer(ls.grpcServer, logsServer)
   157  
   158  	ls.log.Infow("starting grpc server", "address", ls.grpcAddress)
   159  	return ls.grpcServer.Serve(lis)
   160  }
   161  
   162  func (ls *LogsService) Shutdown(ctx context.Context) (err error) {
   163  	err = ls.httpServer.Shutdown(ctx)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	if ls.grpcServer != nil {
   169  		ls.grpcServer.GracefulStop()
   170  	}
   171  
   172  	// TODO decide how to handle graceful shutdown of consumers
   173  
   174  	return nil
   175  }
   176  
   177  func (ls *LogsService) WithHttpAddress(address string) *LogsService {
   178  	ls.httpAddress = address
   179  	return ls
   180  }
   181  
   182  func (ls *LogsService) WithMessageTracing(enabled bool) *LogsService {
   183  	ls.traceMessages = enabled
   184  	return ls
   185  }
   186  
   187  func (ls *LogsService) WithGrpcAddress(address string) *LogsService {
   188  	ls.grpcAddress = address
   189  	return ls
   190  }
   191  
   192  func (ls *LogsService) WithPauseInterval(duration time.Duration) *LogsService {
   193  	ls.stopPauseInterval = duration
   194  	return ls
   195  }
   196  
   197  func (ls *LogsService) WithRandomPort() *LogsService {
   198  	port := rand.Intn(1000) + 17000
   199  	ls.httpAddress = fmt.Sprintf("127.0.0.1:%d", port)
   200  	port = rand.Intn(1000) + 18000
   201  	ls.grpcAddress = fmt.Sprintf("127.0.0.1:%d", port)
   202  	return ls
   203  }
   204  
   205  func (ls *LogsService) WithLogsRepositoryFactory(f repository.Factory) *LogsService {
   206  	ls.logsRepositoryFactory = f
   207  	return ls
   208  }
   209  
   210  // GrpcConnectionConfig contains GRPC connection parameters
   211  type GrpcConnectionConfig struct {
   212  	Secure       bool
   213  	ClientAuth   bool
   214  	CertFile     string
   215  	KeyFile      string
   216  	ClientCAFile string
   217  }
   218  
   219  // GetGrpcTransportCredentials returns transport credentials for GRPC connection config
   220  func GetGrpcTransportCredentials(cfg GrpcConnectionConfig) (credentials.TransportCredentials, error) {
   221  	var creds credentials.TransportCredentials
   222  
   223  	if cfg.Secure {
   224  		var tlsConfig tls.Config
   225  		tlsConfig.ClientAuth = tls.NoClientCert
   226  		if cfg.ClientAuth {
   227  			tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert
   228  		}
   229  
   230  		if cfg.CertFile != "" && cfg.KeyFile != "" {
   231  			cert, err := tls.LoadX509KeyPair(cfg.CertFile, cfg.KeyFile)
   232  			if err != nil {
   233  				return nil, err
   234  			}
   235  
   236  			tlsConfig.Certificates = []tls.Certificate{cert}
   237  		}
   238  
   239  		if cfg.ClientCAFile != "" {
   240  			caCertificate, err := os.ReadFile(cfg.ClientCAFile)
   241  			if err != nil {
   242  				return nil, err
   243  			}
   244  
   245  			certPool := x509.NewCertPool()
   246  			if !certPool.AppendCertsFromPEM(caCertificate) {
   247  				return nil, fmt.Errorf("failed to add client CA's certificate")
   248  			}
   249  
   250  			tlsConfig.ClientCAs = certPool
   251  		}
   252  
   253  		creds = credentials.NewTLS(&tlsConfig)
   254  	}
   255  
   256  	return creds, nil
   257  }