github.com/cilium/cilium@v1.16.2/pkg/hubble/parser/seven/parser.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Hubble
     3  
     4  package seven
     5  
     6  import (
     7  	"fmt"
     8  	"net/netip"
     9  	"sort"
    10  
    11  	lru "github.com/hashicorp/golang-lru/v2"
    12  	"github.com/sirupsen/logrus"
    13  	"google.golang.org/protobuf/types/known/timestamppb"
    14  	"google.golang.org/protobuf/types/known/wrapperspb"
    15  
    16  	flowpb "github.com/cilium/cilium/api/v1/flow"
    17  	"github.com/cilium/cilium/pkg/hubble/parser/errors"
    18  	"github.com/cilium/cilium/pkg/hubble/parser/getters"
    19  	"github.com/cilium/cilium/pkg/hubble/parser/options"
    20  	"github.com/cilium/cilium/pkg/k8s/utils"
    21  	"github.com/cilium/cilium/pkg/monitor/api"
    22  	"github.com/cilium/cilium/pkg/proxy/accesslog"
    23  	"github.com/cilium/cilium/pkg/time"
    24  	"github.com/cilium/cilium/pkg/u8proto"
    25  )
    26  
    27  // Parser is a parser for L7 payloads
    28  type Parser struct {
    29  	log               logrus.FieldLogger
    30  	timestampCache    *lru.Cache[string, time.Time]
    31  	traceContextCache *lru.Cache[string, *flowpb.TraceContext]
    32  	dnsGetter         getters.DNSGetter
    33  	ipGetter          getters.IPGetter
    34  	serviceGetter     getters.ServiceGetter
    35  	endpointGetter    getters.EndpointGetter
    36  	opts              *options.Options
    37  }
    38  
    39  // New returns a new L7 parser
    40  func New(
    41  	log logrus.FieldLogger,
    42  	dnsGetter getters.DNSGetter,
    43  	ipGetter getters.IPGetter,
    44  	serviceGetter getters.ServiceGetter,
    45  	endpointGetter getters.EndpointGetter,
    46  	opts ...options.Option,
    47  ) (*Parser, error) {
    48  	args := &options.Options{
    49  		CacheSize: 10000,
    50  		HubbleRedactSettings: options.HubbleRedactSettings{
    51  			Enabled:            false,
    52  			RedactHTTPUserInfo: true,
    53  			RedactHTTPQuery:    false,
    54  			RedactKafkaAPIKey:  false,
    55  			RedactHttpHeaders: options.HttpHeadersList{
    56  				Allow: map[string]struct{}{},
    57  				Deny:  map[string]struct{}{},
    58  			},
    59  		},
    60  	}
    61  
    62  	for _, opt := range opts {
    63  		opt(args)
    64  	}
    65  
    66  	timestampCache, err := lru.New[string, time.Time](args.CacheSize)
    67  	if err != nil {
    68  		return nil, fmt.Errorf("failed to initialize cache: %w", err)
    69  	}
    70  
    71  	traceIDCache, err := lru.New[string, *flowpb.TraceContext](args.CacheSize)
    72  	if err != nil {
    73  		return nil, fmt.Errorf("failed to initialize cache: %w", err)
    74  	}
    75  
    76  	return &Parser{
    77  		log:               log,
    78  		timestampCache:    timestampCache,
    79  		traceContextCache: traceIDCache,
    80  		dnsGetter:         dnsGetter,
    81  		ipGetter:          ipGetter,
    82  		serviceGetter:     serviceGetter,
    83  		endpointGetter:    endpointGetter,
    84  		opts:              args,
    85  	}, nil
    86  }
    87  
    88  // Decode decodes the data from 'payload' into 'decoded'
    89  func (p *Parser) Decode(r *accesslog.LogRecord, decoded *flowpb.Flow) error {
    90  	// Safety: This function and all the helpers it invokes are not allowed to
    91  	// mutate r in any way. We only have read access to the LogRecord, as it
    92  	// may be shared with other consumers
    93  	if r == nil {
    94  		return errors.ErrEmptyData
    95  	}
    96  
    97  	timestamp, pbTimestamp, err := decodeTime(r.Timestamp)
    98  	if err != nil {
    99  		return err
   100  	}
   101  
   102  	ip := decodeIP(r.IPVersion, r.SourceEndpoint, r.DestinationEndpoint)
   103  
   104  	// Ignore IP parsing errors as IPs can be empty. Getters will handle invalid values.
   105  	// Flows with empty IPs have been observed in practice, but it was not clear what kind of flows
   106  	// those are - errors handling here should be revisited once it's clear.
   107  	sourceIP, _ := netip.ParseAddr(ip.Source)
   108  	destinationIP, _ := netip.ParseAddr(ip.Destination)
   109  	var sourceNames, destinationNames []string
   110  	var sourceNamespace, sourcePod, destinationNamespace, destinationPod string
   111  	if p.dnsGetter != nil {
   112  		sourceNames = p.dnsGetter.GetNamesOf(uint32(r.DestinationEndpoint.ID), sourceIP)
   113  		destinationNames = p.dnsGetter.GetNamesOf(uint32(r.SourceEndpoint.ID), destinationIP)
   114  	}
   115  	if p.ipGetter != nil {
   116  		if meta := p.ipGetter.GetK8sMetadata(sourceIP); meta != nil {
   117  			sourceNamespace, sourcePod = meta.Namespace, meta.PodName
   118  		}
   119  		if meta := p.ipGetter.GetK8sMetadata(destinationIP); meta != nil {
   120  			destinationNamespace, destinationPod = meta.Namespace, meta.PodName
   121  		}
   122  	}
   123  	srcEndpoint := decodeEndpoint(r.SourceEndpoint, sourceNamespace, sourcePod)
   124  	dstEndpoint := decodeEndpoint(r.DestinationEndpoint, destinationNamespace, destinationPod)
   125  
   126  	if p.endpointGetter != nil {
   127  		p.updateEndpointWorkloads(sourceIP, srcEndpoint)
   128  		p.updateEndpointWorkloads(destinationIP, dstEndpoint)
   129  	}
   130  
   131  	l4, sourcePort, destinationPort := decodeLayer4(r.TransportProtocol, r.SourceEndpoint, r.DestinationEndpoint)
   132  	var sourceService, destinationService *flowpb.Service
   133  	if p.serviceGetter != nil {
   134  		sourceService = p.serviceGetter.GetServiceByAddr(sourceIP, sourcePort)
   135  		destinationService = p.serviceGetter.GetServiceByAddr(destinationIP, destinationPort)
   136  	}
   137  
   138  	decoded.Time = pbTimestamp
   139  	decoded.Verdict = decodeVerdict(r.Verdict)
   140  	decoded.DropReason = 0
   141  	decoded.DropReasonDesc = flowpb.DropReason_DROP_REASON_UNKNOWN
   142  	decoded.IP = ip
   143  	decoded.L4 = l4
   144  	decoded.Source = srcEndpoint
   145  	decoded.Destination = dstEndpoint
   146  	decoded.Type = flowpb.FlowType_L7
   147  	decoded.SourceNames = sourceNames
   148  	decoded.DestinationNames = destinationNames
   149  	decoded.L7 = decodeLayer7(r, p.opts)
   150  	decoded.L7.LatencyNs = p.computeResponseTime(r, timestamp)
   151  	decoded.IsReply = decodeIsReply(r.Type)
   152  	decoded.Reply = decoded.GetIsReply().GetValue()
   153  	decoded.EventType = decodeCiliumEventType(api.MessageTypeAccessLog)
   154  	decoded.SourceService = sourceService
   155  	decoded.DestinationService = destinationService
   156  	decoded.TrafficDirection = decodeTrafficDirection(r.ObservationPoint)
   157  	decoded.PolicyMatchType = 0
   158  	decoded.TraceContext = p.getTraceContext(r)
   159  	decoded.Summary = p.getSummary(r, decoded)
   160  
   161  	return nil
   162  }
   163  
   164  func extractRequestID(r *accesslog.LogRecord) string {
   165  	var requestID string
   166  	if r.HTTP != nil {
   167  		requestID = r.HTTP.Headers.Get("X-Request-Id")
   168  	}
   169  	return requestID
   170  }
   171  
   172  func (p *Parser) getTraceContext(r *accesslog.LogRecord) *flowpb.TraceContext {
   173  	requestID := extractRequestID(r)
   174  	switch r.Type {
   175  	case accesslog.TypeRequest:
   176  		traceContext := extractTraceContext(r)
   177  		if traceContext == nil {
   178  			break
   179  		}
   180  		// Envoy should add a requestID to all requests it's managing, but  if it's
   181  		// missing for some reason, don't add to the cache without a requestID.
   182  		if requestID != "" {
   183  			p.traceContextCache.Add(requestID, traceContext)
   184  		}
   185  		return traceContext
   186  	case accesslog.TypeResponse:
   187  		if requestID == "" {
   188  			return nil
   189  		}
   190  		traceContext, ok := p.traceContextCache.Get(requestID)
   191  		if !ok {
   192  			break
   193  		}
   194  		p.traceContextCache.Remove(requestID)
   195  		return traceContext
   196  	}
   197  	return nil
   198  }
   199  
   200  func (p *Parser) computeResponseTime(r *accesslog.LogRecord, timestamp time.Time) uint64 {
   201  	requestID := extractRequestID(r)
   202  	if requestID == "" {
   203  		return 0
   204  	}
   205  	switch r.Type {
   206  	case accesslog.TypeRequest:
   207  		p.timestampCache.Add(requestID, timestamp)
   208  	case accesslog.TypeResponse:
   209  		requestTimestamp, ok := p.timestampCache.Get(requestID)
   210  		if !ok {
   211  			return 0
   212  		}
   213  		p.timestampCache.Remove(requestID)
   214  		latency := timestamp.Sub(requestTimestamp).Nanoseconds()
   215  		if latency < 0 {
   216  			return 0
   217  		}
   218  		return uint64(latency)
   219  	}
   220  
   221  	return 0
   222  }
   223  
   224  func (p *Parser) updateEndpointWorkloads(ip netip.Addr, endpoint *flowpb.Endpoint) {
   225  	if ep, ok := p.endpointGetter.GetEndpointInfo(ip); ok {
   226  		if pod := ep.GetPod(); pod != nil {
   227  			workload, workloadTypeMeta, ok := utils.GetWorkloadMetaFromPod(pod)
   228  			if ok {
   229  				endpoint.Workloads = []*flowpb.Workload{{Kind: workloadTypeMeta.Kind, Name: workload.Name}}
   230  			}
   231  		}
   232  	}
   233  }
   234  
   235  func decodeTime(timestamp string) (goTime time.Time, pbTime *timestamppb.Timestamp, err error) {
   236  	goTime, err = time.Parse(time.RFC3339Nano, timestamp)
   237  	if err != nil {
   238  		return
   239  	}
   240  
   241  	pbTime = timestamppb.New(goTime)
   242  	err = pbTime.CheckValid()
   243  	return
   244  }
   245  
   246  func decodeVerdict(verdict accesslog.FlowVerdict) flowpb.Verdict {
   247  	switch verdict {
   248  	case accesslog.VerdictDenied:
   249  		return flowpb.Verdict_DROPPED
   250  	case accesslog.VerdictForwarded:
   251  		return flowpb.Verdict_FORWARDED
   252  	case accesslog.VerdictRedirected:
   253  		return flowpb.Verdict_REDIRECTED
   254  	case accesslog.VerdictError:
   255  		return flowpb.Verdict_ERROR
   256  	default:
   257  		return flowpb.Verdict_VERDICT_UNKNOWN
   258  	}
   259  }
   260  
   261  func decodeTrafficDirection(direction accesslog.ObservationPoint) flowpb.TrafficDirection {
   262  	switch direction {
   263  	case accesslog.Ingress:
   264  		return flowpb.TrafficDirection_INGRESS
   265  	case accesslog.Egress:
   266  		return flowpb.TrafficDirection_EGRESS
   267  	default:
   268  		return flowpb.TrafficDirection_TRAFFIC_DIRECTION_UNKNOWN
   269  	}
   270  }
   271  
   272  func decodeIP(version accesslog.IPVersion, source, destination accesslog.EndpointInfo) *flowpb.IP {
   273  	switch version {
   274  	case accesslog.VersionIPv4:
   275  		return &flowpb.IP{
   276  			Source:      source.IPv4,
   277  			Destination: destination.IPv4,
   278  			IpVersion:   flowpb.IPVersion_IPv4,
   279  		}
   280  	case accesslog.VersionIPV6:
   281  		return &flowpb.IP{
   282  			Source:      source.IPv6,
   283  			Destination: destination.IPv6,
   284  			IpVersion:   flowpb.IPVersion_IPv6,
   285  		}
   286  	default:
   287  		return nil
   288  	}
   289  }
   290  
   291  func decodeLayer4(protocol accesslog.TransportProtocol, source, destination accesslog.EndpointInfo) (l4 *flowpb.Layer4, srcPort, dstPort uint16) {
   292  	switch u8proto.U8proto(protocol) {
   293  	case u8proto.TCP:
   294  		return &flowpb.Layer4{
   295  			Protocol: &flowpb.Layer4_TCP{
   296  				TCP: &flowpb.TCP{
   297  					SourcePort:      uint32(source.Port),
   298  					DestinationPort: uint32(destination.Port),
   299  				},
   300  			},
   301  		}, uint16(source.Port), uint16(destination.Port)
   302  	case u8proto.UDP:
   303  		return &flowpb.Layer4{
   304  			Protocol: &flowpb.Layer4_UDP{
   305  				UDP: &flowpb.UDP{
   306  					SourcePort:      uint32(source.Port),
   307  					DestinationPort: uint32(destination.Port),
   308  				},
   309  			},
   310  		}, uint16(source.Port), uint16(destination.Port)
   311  	case u8proto.SCTP:
   312  		return &flowpb.Layer4{
   313  			Protocol: &flowpb.Layer4_SCTP{
   314  				SCTP: &flowpb.SCTP{
   315  					SourcePort:      uint32(source.Port),
   316  					DestinationPort: uint32(destination.Port),
   317  				},
   318  			},
   319  		}, uint16(source.Port), uint16(destination.Port)
   320  	default:
   321  		return nil, 0, 0
   322  	}
   323  }
   324  
   325  func decodeEndpoint(endpoint accesslog.EndpointInfo, namespace, podName string) *flowpb.Endpoint {
   326  	labels := endpoint.Labels.GetModel()
   327  	sort.Strings(labels)
   328  	return &flowpb.Endpoint{
   329  		ID:        uint32(endpoint.ID),
   330  		Identity:  uint32(endpoint.Identity),
   331  		Namespace: namespace,
   332  		Labels:    labels,
   333  		PodName:   podName,
   334  	}
   335  }
   336  
   337  func decodeLayer7(r *accesslog.LogRecord, opts *options.Options) *flowpb.Layer7 {
   338  	var flowType flowpb.L7FlowType
   339  	switch r.Type {
   340  	case accesslog.TypeRequest:
   341  		flowType = flowpb.L7FlowType_REQUEST
   342  	case accesslog.TypeResponse:
   343  		flowType = flowpb.L7FlowType_RESPONSE
   344  	case accesslog.TypeSample:
   345  		flowType = flowpb.L7FlowType_SAMPLE
   346  	}
   347  
   348  	switch {
   349  	case r.DNS != nil:
   350  		return &flowpb.Layer7{
   351  			Type:   flowType,
   352  			Record: decodeDNS(r.Type, r.DNS),
   353  		}
   354  	case r.HTTP != nil:
   355  		return &flowpb.Layer7{
   356  			Type:   flowType,
   357  			Record: decodeHTTP(r.Type, r.HTTP, opts),
   358  		}
   359  	case r.Kafka != nil:
   360  		return &flowpb.Layer7{
   361  			Type:   flowType,
   362  			Record: decodeKafka(r.Type, r.Kafka, opts),
   363  		}
   364  	default:
   365  		return &flowpb.Layer7{
   366  			Type: flowType,
   367  		}
   368  	}
   369  }
   370  
   371  func decodeIsReply(t accesslog.FlowType) *wrapperspb.BoolValue {
   372  	return &wrapperspb.BoolValue{
   373  		Value: t == accesslog.TypeResponse,
   374  	}
   375  }
   376  
   377  func decodeCiliumEventType(eventType uint8) *flowpb.CiliumEventType {
   378  	return &flowpb.CiliumEventType{
   379  		Type: int32(eventType),
   380  	}
   381  }
   382  
   383  func genericSummary(l7 *accesslog.LogRecordL7) string {
   384  	return fmt.Sprintf("%s Fields: %s", l7.Proto, l7.Fields)
   385  }
   386  
   387  func (p *Parser) getSummary(logRecord *accesslog.LogRecord, flow *flowpb.Flow) string {
   388  	if logRecord == nil {
   389  		return ""
   390  	}
   391  	if http := logRecord.HTTP; http != nil {
   392  		return p.httpSummary(logRecord.Type, http, flow)
   393  	} else if kafka := logRecord.Kafka; kafka != nil {
   394  		return kafkaSummary(flow)
   395  	} else if dns := logRecord.DNS; dns != nil {
   396  		return dnsSummary(logRecord.Type, dns)
   397  	} else if generic := logRecord.L7; generic != nil {
   398  		return genericSummary(generic)
   399  	}
   400  
   401  	return ""
   402  }