github.com/kubeshop/testkube@v1.17.23/pkg/logs/sidecar/proxy.go (about) 1 package sidecar 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strings" 11 "syscall" 12 "time" 13 14 "github.com/nats-io/nats.go/jetstream" 15 "github.com/pkg/errors" 16 "go.uber.org/zap" 17 corev1 "k8s.io/api/core/v1" 18 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 "k8s.io/apimachinery/pkg/util/wait" 20 "k8s.io/client-go/kubernetes" 21 tcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 22 23 "github.com/kubeshop/testkube/pkg/executor" 24 "github.com/kubeshop/testkube/pkg/logs/client" 25 "github.com/kubeshop/testkube/pkg/logs/events" 26 "github.com/kubeshop/testkube/pkg/utils" 27 ) 28 29 var ( 30 ErrStopSignalReceived = errors.New("stop signal received") 31 ) 32 33 const ( 34 pollInterval = time.Second 35 podStartTimeout = 30 * time.Minute 36 logsBuffer = 1000 37 ) 38 39 func NewProxy(clientset kubernetes.Interface, podsClient tcorev1.PodInterface, logsStream client.Stream, js jetstream.JetStream, log *zap.SugaredLogger, 40 namespace, executionId, source string) *Proxy { 41 return &Proxy{ 42 log: log.With("service", "logs-proxy", "namespace", namespace, "executionId", executionId), 43 js: js, 44 clientset: clientset, 45 namespace: namespace, 46 executionId: executionId, 47 podsClient: podsClient, 48 logsStream: logsStream, 49 source: source, 50 } 51 } 52 53 type Proxy struct { 54 log *zap.SugaredLogger 55 js jetstream.JetStream 56 clientset kubernetes.Interface 57 namespace string 58 executionId string 59 source string 60 podsClient tcorev1.PodInterface 61 logsStream client.InitializedStreamPusher 62 } 63 64 func (p *Proxy) Run(ctx context.Context) error { 65 66 sigs := make(chan os.Signal, 1) 67 signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) 68 69 logs := make(chan *events.Log, logsBuffer) 70 71 // create stream for incoming logs 72 _, err := p.logsStream.Init(ctx, p.executionId) 73 if err != nil { 74 return err 75 } 76 77 go func() { 78 p.log.Debugw("logs proxy stream started") 79 err := p.streamLogs(ctx, logs) 80 if err != nil { 81 p.handleError(err, "logs proxy stream error") 82 } 83 }() 84 85 for l := range logs { 86 select { 87 case <-sigs: 88 p.log.Warn("logs proxy received signal, exiting", "signal", sigs) 89 p.handleError(ErrStopSignalReceived, "context cancelled stopping logs proxy") 90 return ErrStopSignalReceived 91 case <-ctx.Done(): 92 p.log.Warn("logs proxy context cancelled, exiting") 93 return nil 94 default: 95 err = p.logsStream.Push(ctx, p.executionId, l) 96 if err != nil { 97 p.handleError(err, "error pushing logs to stream") 98 return err 99 } 100 } 101 } 102 103 p.log.Infow("logs proxy sending completed") 104 return nil 105 } 106 107 func (p *Proxy) streamLogs(ctx context.Context, logs chan *events.Log) (err error) { 108 pods, err := executor.GetJobPods(ctx, p.podsClient, p.executionId, 1, 10) 109 if err != nil { 110 p.handleError(err, "error getting job pods") 111 return err 112 } 113 114 for _, pod := range pods.Items { 115 l := p.log.With("namespace", pod.Namespace, "podName", pod.Name, "podStatus", pod.Status) 116 117 switch pod.Status.Phase { 118 119 case corev1.PodRunning: 120 l.Debug("streaming pod logs: immediately") 121 return p.streamLogsFromPod(pod, logs) 122 123 case corev1.PodFailed: 124 err := fmt.Errorf("can't get pod logs, pod failed: %s/%s", pod.Namespace, pod.Name) 125 p.handleError(err, "streaming pod logs: pod failed") 126 return err 127 128 default: 129 l.Debugw("streaming pod logs: waiting for pod to be ready") 130 testFunc := p.isPodLoggable(pod.Name) 131 if err = wait.PollUntilContextTimeout(ctx, pollInterval, podStartTimeout, true, testFunc); err != nil { 132 // try to get pod container statuses from Waiting and Terminated states 133 status := p.getPodContainerStatuses(pod) 134 p.handleError(err, "can't get pod container status after pod failure") 135 return errors.Wrap(err, status) 136 } 137 138 l.Debug("streaming pod logs: pod is loggable") 139 return p.streamLogsFromPod(pod, logs) 140 } 141 } 142 return 143 } 144 145 func (p *Proxy) streamLogsFromPod(pod corev1.Pod, logs chan *events.Log) (err error) { 146 defer close(logs) 147 148 var containers []string 149 for _, container := range pod.Spec.InitContainers { 150 containers = append(containers, container.Name) 151 } 152 153 for _, container := range pod.Spec.Containers { 154 containers = append(containers, container.Name) 155 } 156 157 for _, container := range containers { 158 // We skip logsidecar container logs, 159 // because following the logs for it will never finish 160 // and the sidecar will run forever. 161 if strings.HasSuffix(container, "-logs") { 162 continue 163 } 164 req := p.podsClient.GetLogs( 165 pod.Name, 166 &corev1.PodLogOptions{ 167 Follow: true, 168 Timestamps: true, 169 Container: container, 170 }) 171 172 stream, err := req.Stream(context.Background()) 173 if err != nil { 174 p.handleError(err, "error getting pod logs stream") 175 return err 176 } 177 178 reader := bufio.NewReader(stream) 179 for { 180 b, err := utils.ReadLongLine(reader) 181 if err != nil { 182 if errors.Is(err, io.EOF) { 183 err = nil 184 } 185 break 186 } 187 188 // parse log line - also handle old (output.Output) and new format (just unstructured []byte) 189 source := events.SourceJobPod 190 if p.source != "" { 191 source = p.source 192 } 193 194 logs <- events.NewLogFromBytes(b). 195 WithSource(source) 196 } 197 198 if err != nil { 199 p.handleError(err, "error while reading pod logs stream") 200 return err 201 } 202 } 203 204 return 205 206 } 207 208 // isPodLoggable checks if pod can be logged through kubernetes API 209 func (p *Proxy) isPodLoggable(podName string) wait.ConditionWithContextFunc { 210 211 namespace := p.namespace 212 c := p.clientset 213 214 return func(ctx context.Context) (bool, error) { 215 pod, err := c.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) 216 if err != nil { 217 return false, err 218 } 219 220 if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodRunning { 221 return true, nil 222 } 223 224 if err = executor.IsPodFailed(pod); err != nil { 225 return true, err 226 } 227 228 return false, nil 229 } 230 } 231 232 // getPodContainerStatuses returns string with container statuses in case of failure or timeouted 233 func (p *Proxy) getPodContainerStatuses(pod corev1.Pod) (status string) { 234 for _, s := range pod.Status.ContainerStatuses { 235 if s.State.Terminated != nil { 236 t := s.State.Terminated 237 status += fmt.Sprintf("Pod container '%s' terminated: %s (exit code: %v, reason: %s, signal: %d); ", s.Name, t.Message, t.ExitCode, t.Reason, t.Signal) 238 } 239 240 if s.State.Waiting != nil { 241 w := s.State.Waiting 242 status += fmt.Sprintf("Pod conitainer '%s' waiting: %s (reason: %s); ", s.Name, w.Message, w.Reason) 243 } 244 } 245 246 return status 247 } 248 249 // handleError will handle errors and push it as log chunk to logs stream 250 func (p *Proxy) handleError(err error, title string) { 251 if err != nil { 252 p.log.Errorw(title, "error", err) 253 err = p.logsStream.Push(context.Background(), p.executionId, events.NewErrorLog(err).WithSource("logs-proxy")) 254 if err != nil { 255 p.log.Errorw("error pushing error to stream", "title", title, "error", err) 256 } 257 258 } 259 }