github.com/kubeshop/testkube@v1.17.23/pkg/logs/service.go (about) 1 // TODO how to handle pod issues here? 2 // TODO how to know that there is topic which is not handled by any subscriber? 3 // TODO we would need to check pending log topics and handle them after restart in case of log pod disaster 4 5 package logs 6 7 import ( 8 "context" 9 "crypto/tls" 10 "crypto/x509" 11 "fmt" 12 "math/rand" 13 "net" 14 "net/http" 15 "os" 16 "sync" 17 "time" 18 19 "github.com/nats-io/nats.go" 20 "github.com/nats-io/nats.go/jetstream" 21 "go.uber.org/zap" 22 "google.golang.org/grpc" 23 "google.golang.org/grpc/credentials" 24 25 "github.com/kubeshop/testkube/pkg/log" 26 "github.com/kubeshop/testkube/pkg/logs/adapter" 27 "github.com/kubeshop/testkube/pkg/logs/client" 28 "github.com/kubeshop/testkube/pkg/logs/pb" 29 "github.com/kubeshop/testkube/pkg/logs/repository" 30 "github.com/kubeshop/testkube/pkg/logs/state" 31 ) 32 33 const ( 34 DefaultHttpAddress = ":8080" 35 DefaultGrpcAddress = ":9090" 36 37 defaultStopPauseInterval = 200 * time.Millisecond 38 ) 39 40 func NewLogsService(nats *nats.Conn, js jetstream.JetStream, state state.Interface, stream client.Stream) *LogsService { 41 return &LogsService{ 42 nats: nats, 43 adapters: []adapter.Adapter{}, 44 js: js, 45 log: log.DefaultLogger.With("service", "logs-service"), 46 Ready: make(chan struct{}, 1), 47 httpAddress: DefaultHttpAddress, 48 grpcAddress: DefaultGrpcAddress, 49 consumerInstances: sync.Map{}, 50 state: state, 51 stopPauseInterval: defaultStopPauseInterval, 52 logStream: stream, 53 } 54 } 55 56 type LogsService struct { 57 logsRepositoryFactory repository.Factory 58 log *zap.SugaredLogger 59 nats *nats.Conn 60 js jetstream.JetStream 61 adapters []adapter.Adapter 62 63 // logStream to manage and send data to logs streams 64 logStream client.Stream 65 66 Ready chan struct{} 67 68 // grpcAddress is address for grpc server 69 grpcAddress string 70 // grpcServer is grpc server for logs service 71 grpcServer *grpc.Server 72 73 // httpAddress is address for Kubernetes http health check handler 74 httpAddress string 75 // httpServer is http server for health check (for Kubernetes below 1.25) 76 httpServer *http.Server 77 78 // consumerInstances is internal executionID => Consumer map which we need to clean 79 // each pod can have different executionId set of consumers 80 consumerInstances sync.Map 81 82 // state manager for keeping logs state (pending, finished) 83 // will allow to distiguish from where load data from in OSS 84 // cloud will be loading always them locally 85 state state.Interface 86 87 // stop wait time for messages cool down 88 stopPauseInterval time.Duration 89 90 // trace incoming messages 91 traceMessages bool 92 } 93 94 // AddAdapter adds new adapter to logs service adapters will be configred based on given mode 95 // e.g. cloud mode will get cloud adapter to store logs directly on the cloud 96 func (ls *LogsService) AddAdapter(a adapter.Adapter) { 97 ls.adapters = append(ls.adapters, a) 98 } 99 100 func (ls *LogsService) Run(ctx context.Context) (err error) { 101 // Handle start and stop events from nats 102 // assuming after start event something is pushing data to the stream 103 // it can be our handler or some other service 104 go ls.metrics() 105 106 // For start event we must build stream for given execution id and start consuming it 107 // this one will must follow a queue group each pod will get it's own bunch of executions to handle 108 // Start event will be triggered by logs process controller (scheduler) 109 // group is common name for both start and stop subjects 110 for group, subject := range StartSubjects { 111 ls.nats.QueueSubscribe(subject, StartQueue, ls.handleStart(ctx, group)) 112 } 113 114 // listen on all pods as we don't control which one will have given consumer 115 // Stop event will be triggered by logs process controller (scheduler) 116 for group, subject := range StopSubjects { 117 ls.nats.Subscribe(subject, ls.handleStop(ctx, group)) 118 } 119 120 // Send ready signal 121 ls.Ready <- struct{}{} 122 123 // block main routine 124 <-ctx.Done() 125 126 return nil 127 } 128 129 func (ls *LogsService) metrics() { 130 for { 131 count := 0 132 ls.consumerInstances.Range(func(_, _ interface{}) bool { 133 count++ 134 return true 135 }) 136 ls.log.Infow("metrics", "consumers", count) 137 time.Sleep(1 * time.Minute) 138 } 139 } 140 141 // TODO handle TLS 142 func (ls *LogsService) RunGRPCServer(ctx context.Context, creds credentials.TransportCredentials) error { 143 lis, err := net.Listen("tcp", ls.grpcAddress) 144 if err != nil { 145 return err 146 } 147 148 var opts []grpc.ServerOption 149 if creds != nil { 150 opts = append(opts, grpc.Creds(creds)) 151 } 152 153 ls.grpcServer = grpc.NewServer(opts...) 154 155 logsServer := NewLogsServer(ls.logsRepositoryFactory, ls.state).WithMessageTracing(ls.traceMessages) 156 pb.RegisterLogsServiceServer(ls.grpcServer, logsServer) 157 158 ls.log.Infow("starting grpc server", "address", ls.grpcAddress) 159 return ls.grpcServer.Serve(lis) 160 } 161 162 func (ls *LogsService) Shutdown(ctx context.Context) (err error) { 163 err = ls.httpServer.Shutdown(ctx) 164 if err != nil { 165 return err 166 } 167 168 if ls.grpcServer != nil { 169 ls.grpcServer.GracefulStop() 170 } 171 172 // TODO decide how to handle graceful shutdown of consumers 173 174 return nil 175 } 176 177 func (ls *LogsService) WithHttpAddress(address string) *LogsService { 178 ls.httpAddress = address 179 return ls 180 } 181 182 func (ls *LogsService) WithMessageTracing(enabled bool) *LogsService { 183 ls.traceMessages = enabled 184 return ls 185 } 186 187 func (ls *LogsService) WithGrpcAddress(address string) *LogsService { 188 ls.grpcAddress = address 189 return ls 190 } 191 192 func (ls *LogsService) WithPauseInterval(duration time.Duration) *LogsService { 193 ls.stopPauseInterval = duration 194 return ls 195 } 196 197 func (ls *LogsService) WithRandomPort() *LogsService { 198 port := rand.Intn(1000) + 17000 199 ls.httpAddress = fmt.Sprintf("127.0.0.1:%d", port) 200 port = rand.Intn(1000) + 18000 201 ls.grpcAddress = fmt.Sprintf("127.0.0.1:%d", port) 202 return ls 203 } 204 205 func (ls *LogsService) WithLogsRepositoryFactory(f repository.Factory) *LogsService { 206 ls.logsRepositoryFactory = f 207 return ls 208 } 209 210 // GrpcConnectionConfig contains GRPC connection parameters 211 type GrpcConnectionConfig struct { 212 Secure bool 213 ClientAuth bool 214 CertFile string 215 KeyFile string 216 ClientCAFile string 217 } 218 219 // GetGrpcTransportCredentials returns transport credentials for GRPC connection config 220 func GetGrpcTransportCredentials(cfg GrpcConnectionConfig) (credentials.TransportCredentials, error) { 221 var creds credentials.TransportCredentials 222 223 if cfg.Secure { 224 var tlsConfig tls.Config 225 tlsConfig.ClientAuth = tls.NoClientCert 226 if cfg.ClientAuth { 227 tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert 228 } 229 230 if cfg.CertFile != "" && cfg.KeyFile != "" { 231 cert, err := tls.LoadX509KeyPair(cfg.CertFile, cfg.KeyFile) 232 if err != nil { 233 return nil, err 234 } 235 236 tlsConfig.Certificates = []tls.Certificate{cert} 237 } 238 239 if cfg.ClientCAFile != "" { 240 caCertificate, err := os.ReadFile(cfg.ClientCAFile) 241 if err != nil { 242 return nil, err 243 } 244 245 certPool := x509.NewCertPool() 246 if !certPool.AppendCertsFromPEM(caCertificate) { 247 return nil, fmt.Errorf("failed to add client CA's certificate") 248 } 249 250 tlsConfig.ClientCAs = certPool 251 } 252 253 creds = credentials.NewTLS(&tlsConfig) 254 } 255 256 return creds, nil 257 }