github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/log/pipeline.go (about)

     1  package log
     2  
     3  import (
     4  	"reflect"
     5  	"unsafe"
     6  
     7  	"github.com/prometheus/prometheus/model/labels"
     8  )
     9  
    10  // NoopStage is a stage that doesn't process a log line.
    11  var NoopStage Stage = &noopStage{}
    12  
    13  // Pipeline can create pipelines for each log stream.
    14  type Pipeline interface {
    15  	ForStream(labels labels.Labels) StreamPipeline
    16  }
    17  
    18  // StreamPipeline transform and filter log lines and labels.
    19  // A StreamPipeline never mutate the received line.
    20  type StreamPipeline interface {
    21  	BaseLabels() LabelsResult
    22  	// Process processes a log line and returns the transformed line and the labels.
    23  	// The buffer returned for the log line can be reused on subsequent calls to Process and therefore must be copied.
    24  	Process(ts int64, line []byte) (resultLine []byte, resultLabels LabelsResult, matches bool)
    25  	ProcessString(ts int64, line string) (resultLine string, resultLabels LabelsResult, matches bool)
    26  }
    27  
    28  // Stage is a single step of a Pipeline.
    29  // A Stage implementation should never mutate the line passed, but instead either
    30  // return the line unchanged or allocate a new line.
    31  type Stage interface {
    32  	Process(ts int64, line []byte, lbs *LabelsBuilder) ([]byte, bool)
    33  	RequiredLabelNames() []string
    34  }
    35  
    36  // NewNoopPipeline creates a pipelines that does not process anything and returns log streams as is.
    37  func NewNoopPipeline() Pipeline {
    38  	return &noopPipeline{
    39  		cache: map[uint64]*noopStreamPipeline{},
    40  	}
    41  }
    42  
    43  type noopPipeline struct {
    44  	cache map[uint64]*noopStreamPipeline
    45  }
    46  
    47  // IsNoopPipeline tells if a pipeline is a Noop.
    48  func IsNoopPipeline(p Pipeline) bool {
    49  	_, ok := p.(*noopPipeline)
    50  	return ok
    51  }
    52  
    53  type noopStreamPipeline struct {
    54  	LabelsResult
    55  }
    56  
    57  func (n noopStreamPipeline) Process(_ int64, line []byte) ([]byte, LabelsResult, bool) {
    58  	return line, n.LabelsResult, true
    59  }
    60  
    61  func (n noopStreamPipeline) ProcessString(_ int64, line string) (string, LabelsResult, bool) {
    62  	return line, n.LabelsResult, true
    63  }
    64  
    65  func (n noopStreamPipeline) BaseLabels() LabelsResult { return n.LabelsResult }
    66  
    67  func (n *noopPipeline) ForStream(labels labels.Labels) StreamPipeline {
    68  	h := labels.Hash()
    69  	if cached, ok := n.cache[h]; ok {
    70  		return cached
    71  	}
    72  	sp := &noopStreamPipeline{LabelsResult: NewLabelsResult(labels, h)}
    73  	n.cache[h] = sp
    74  	return sp
    75  }
    76  
    77  type noopStage struct{}
    78  
    79  func (noopStage) Process(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) {
    80  	return line, true
    81  }
    82  func (noopStage) RequiredLabelNames() []string { return []string{} }
    83  
    84  type StageFunc struct {
    85  	process        func(ts int64, line []byte, lbs *LabelsBuilder) ([]byte, bool)
    86  	requiredLabels []string
    87  }
    88  
    89  func (fn StageFunc) Process(ts int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
    90  	return fn.process(ts, line, lbs)
    91  }
    92  
    93  func (fn StageFunc) RequiredLabelNames() []string {
    94  	if fn.requiredLabels == nil {
    95  		return []string{}
    96  	}
    97  	return fn.requiredLabels
    98  }
    99  
   100  // pipeline is a combinations of multiple stages.
   101  // It can also be reduced into a single stage for convenience.
   102  type pipeline struct {
   103  	stages      []Stage
   104  	baseBuilder *BaseLabelsBuilder
   105  
   106  	streamPipelines map[uint64]StreamPipeline
   107  }
   108  
   109  // NewPipeline creates a new pipeline for a given set of stages.
   110  func NewPipeline(stages []Stage) Pipeline {
   111  	if len(stages) == 0 {
   112  		return NewNoopPipeline()
   113  	}
   114  	return &pipeline{
   115  		stages:          stages,
   116  		baseBuilder:     NewBaseLabelsBuilder(),
   117  		streamPipelines: make(map[uint64]StreamPipeline),
   118  	}
   119  }
   120  
   121  type streamPipeline struct {
   122  	stages  []Stage
   123  	builder *LabelsBuilder
   124  }
   125  
   126  func (p *pipeline) ForStream(labels labels.Labels) StreamPipeline {
   127  	hash := p.baseBuilder.Hash(labels)
   128  	if res, ok := p.streamPipelines[hash]; ok {
   129  		return res
   130  	}
   131  
   132  	res := &streamPipeline{
   133  		stages:  p.stages,
   134  		builder: p.baseBuilder.ForLabels(labels, hash),
   135  	}
   136  	p.streamPipelines[hash] = res
   137  	return res
   138  }
   139  
   140  func (p *streamPipeline) Process(ts int64, line []byte) ([]byte, LabelsResult, bool) {
   141  	var ok bool
   142  	p.builder.Reset()
   143  	for _, s := range p.stages {
   144  		line, ok = s.Process(ts, line, p.builder)
   145  		if !ok {
   146  			return nil, nil, false
   147  		}
   148  	}
   149  	return line, p.builder.LabelsResult(), true
   150  }
   151  
   152  func (p *streamPipeline) ProcessString(ts int64, line string) (string, LabelsResult, bool) {
   153  	// Stages only read from the line.
   154  	lb, lr, ok := p.Process(ts, unsafeGetBytes(line))
   155  	// but the returned line needs to be copied.
   156  	return string(lb), lr, ok
   157  }
   158  
   159  func (p *streamPipeline) BaseLabels() LabelsResult { return p.builder.currentResult }
   160  
   161  // PipelineFilter contains a set of matchers and a pipeline that, when matched,
   162  // causes an entry from a log stream to be skipped. Matching entries must also
   163  // fall between 'start' and 'end', inclusive
   164  type PipelineFilter struct {
   165  	Start    int64
   166  	End      int64
   167  	Matchers []*labels.Matcher
   168  	Pipeline Pipeline
   169  }
   170  
   171  // NewFilteringPipeline creates a pipeline where entries from the underlying
   172  // log stream are filtered by pipeline filters before being passed to the
   173  // pipeline representing the queried data. Filters are always upstream of the
   174  // pipeline
   175  func NewFilteringPipeline(f []PipelineFilter, p Pipeline) Pipeline {
   176  	return &filteringPipeline{
   177  		filters:  f,
   178  		pipeline: p,
   179  	}
   180  }
   181  
   182  type filteringPipeline struct {
   183  	filters  []PipelineFilter
   184  	pipeline Pipeline
   185  }
   186  
   187  func (p *filteringPipeline) ForStream(labels labels.Labels) StreamPipeline {
   188  	var streamFilters []streamFilter
   189  	for _, f := range p.filters {
   190  		if allMatch(f.Matchers, labels) {
   191  			streamFilters = append(streamFilters, streamFilter{
   192  				start:    f.Start,
   193  				end:      f.End,
   194  				pipeline: f.Pipeline.ForStream(labels),
   195  			})
   196  		}
   197  	}
   198  
   199  	return &filteringStreamPipeline{
   200  		filters:  streamFilters,
   201  		pipeline: p.pipeline.ForStream(labels),
   202  	}
   203  }
   204  
   205  func allMatch(matchers []*labels.Matcher, labels labels.Labels) bool {
   206  	for _, m := range matchers {
   207  		if !m.Matches(labels.Get(m.Name)) {
   208  			return false
   209  		}
   210  	}
   211  	return true
   212  }
   213  
   214  type streamFilter struct {
   215  	start    int64
   216  	end      int64
   217  	pipeline StreamPipeline
   218  }
   219  
   220  type filteringStreamPipeline struct {
   221  	filters  []streamFilter
   222  	pipeline StreamPipeline
   223  }
   224  
   225  func (sp *filteringStreamPipeline) BaseLabels() LabelsResult {
   226  	return sp.pipeline.BaseLabels()
   227  }
   228  
   229  func (sp *filteringStreamPipeline) Process(ts int64, line []byte) ([]byte, LabelsResult, bool) {
   230  	for _, filter := range sp.filters {
   231  		if ts < filter.start || ts > filter.end {
   232  			continue
   233  		}
   234  
   235  		_, _, matches := filter.pipeline.Process(ts, line)
   236  		if matches { // When the filter matches, don't run the next step
   237  			return nil, nil, false
   238  		}
   239  	}
   240  
   241  	return sp.pipeline.Process(ts, line)
   242  }
   243  
   244  func (sp *filteringStreamPipeline) ProcessString(ts int64, line string) (string, LabelsResult, bool) {
   245  	for _, filter := range sp.filters {
   246  		if ts < filter.start || ts > filter.end {
   247  			continue
   248  		}
   249  
   250  		_, _, matches := filter.pipeline.ProcessString(ts, line)
   251  		if matches { // When the filter matches, don't run the next step
   252  			return "", nil, false
   253  		}
   254  	}
   255  
   256  	return sp.pipeline.ProcessString(ts, line)
   257  }
   258  
   259  // ReduceStages reduces multiple stages into one.
   260  func ReduceStages(stages []Stage) Stage {
   261  	if len(stages) == 0 {
   262  		return NoopStage
   263  	}
   264  	var requiredLabelNames []string
   265  	for _, s := range stages {
   266  		requiredLabelNames = append(requiredLabelNames, s.RequiredLabelNames()...)
   267  	}
   268  	return StageFunc{
   269  		process: func(ts int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   270  			var ok bool
   271  			for _, p := range stages {
   272  				line, ok = p.Process(ts, line, lbs)
   273  				if !ok {
   274  					return nil, false
   275  				}
   276  			}
   277  			return line, true
   278  		},
   279  		requiredLabels: requiredLabelNames,
   280  	}
   281  }
   282  
   283  func unsafeGetBytes(s string) []byte {
   284  	var buf []byte
   285  	p := unsafe.Pointer(&buf)
   286  	*(*string)(p) = s
   287  	(*reflect.SliceHeader)(p).Cap = len(s)
   288  	return buf
   289  }
   290  
   291  func unsafeGetString(buf []byte) string {
   292  	return *((*string)(unsafe.Pointer(&buf)))
   293  }