github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/structs/querystructs.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package structs
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"regexp"
    23  	"strings"
    24  
    25  	"strconv"
    26  
    27  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    28  	"github.com/siglens/siglens/pkg/config"
    29  	"github.com/siglens/siglens/pkg/segment/pqmr"
    30  	"github.com/siglens/siglens/pkg/segment/utils"
    31  	vtable "github.com/siglens/siglens/pkg/virtualtable"
    32  )
    33  
    34  // New struct for passin query params
    35  type QueryContext struct {
    36  	TableInfo *TableInfo
    37  	SizeLimit uint64
    38  	Scroll    int
    39  	Orgid     uint64
    40  }
    41  
    42  // Input for filter operator can either be the result of a ASTNode or an expression
    43  // either subtree or expression is defined, but never both and never neither
    44  type FilterInput struct {
    45  	SubTree       *ASTNode    // root of ASTNode
    46  	SubtreeResult string      // result of processing subtree
    47  	Expression    *Expression // expression for filterInput
    48  }
    49  
    50  type NodeAggregation struct {
    51  	AggregationFunctions utils.AggregateFunctions // function to apply on results of children (e.g. min, max)
    52  	AggregationColumns   []string                 // column names to aggregate on (i.e avg over which column name?)
    53  }
    54  
    55  type MatchFilterType uint8
    56  
    57  const (
    58  	MATCH_WORDS MatchFilterType = iota + 1
    59  	MATCH_PHRASE
    60  	MATCH_DICT_ARRAY
    61  )
    62  
    63  // MatchFilter searches for all words in matchWords in the column matchColumn
    64  // The matchOperator defines if all or any of the matchWords need to be present
    65  type MatchFilter struct {
    66  	MatchColumn    string                 // column to search for
    67  	MatchWords     [][]byte               // all words to search for
    68  	MatchOperator  utils.LogicalOperator  // how to combine matchWords
    69  	MatchPhrase    []byte                 //whole string to search for in case of MatchPhrase query
    70  	MatchDictArray *MatchDictArrayRequest //array to search for in case of jaeger query
    71  	MatchType      MatchFilterType
    72  	NegateMatch    bool
    73  	Regexp         *regexp.Regexp
    74  }
    75  
    76  type MatchDictArrayRequest struct {
    77  	MatchKey   []byte
    78  	MatchValue *utils.DtypeEnclosure
    79  }
    80  
    81  // ExpressionFilter denotes a single expression to search for in a log record
    82  type ExpressionFilter struct {
    83  	LeftInput      *FilterInput         // left input to filterOperator
    84  	FilterOperator utils.FilterOperator // how to logField in logline (i.e logField=filterString, logField >= filterValue)
    85  	RightInput     *FilterInput         // right input to filterOperator
    86  }
    87  
    88  // Top level filter criteria condition that define either a MatchFilter or ExpressionFilter. Only one will be defined, never both
    89  type FilterCriteria struct {
    90  	MatchFilter      *MatchFilter      // match filter to check multiple words in a column
    91  	ExpressionFilter *ExpressionFilter // expression filter to check a single expression in a column
    92  }
    93  
    94  // A condition struct defines the FilterConditions and ASTNodes that exist as a part of a single condition
    95  type Condition struct {
    96  	FilterCriteria []*FilterCriteria // raw conditions to check
    97  	NestedNodes    []*ASTNode        // nested conditions to check
    98  }
    99  
   100  // Node used to query data in a segment file.
   101  // A line matches a node if it matches all conditions in AndFilterConditions, any in OrFilterCriteria, and none in ExclusionFilterCriteria
   102  type ASTNode struct {
   103  	AndFilterCondition       *Condition     // A condition to query. Condition must return true for log line to pass
   104  	OrFilterCondition        *Condition     // Condition must return true for log line to pass
   105  	ExclusionFilterCondition *Condition     // Condition must return false for log line to pass
   106  	TimeRange                *dtu.TimeRange // Time range for node micro index / raw search
   107  	ActiveFileSearch         bool           // Lookup unrotated segfiles
   108  	BucketLimit              int
   109  }
   110  
   111  // Helper struct to keep track of raw and expanded tables
   112  type TableInfo struct {
   113  	rawRequest   string
   114  	queryTables  []string
   115  	kibanaTables []string
   116  	numIndices   int
   117  }
   118  
   119  // Helper struct to keep track of which blocks to check
   120  type BlockTracker struct {
   121  	entireFile    bool
   122  	excludeBlocks map[uint16]bool
   123  }
   124  
   125  func InitTableInfo(rawRequest string, orgid uint64, es bool) *TableInfo {
   126  	indexNamesRetrieved := vtable.ExpandAndReturnIndexNames(rawRequest, orgid, es)
   127  	ti := &TableInfo{rawRequest: rawRequest}
   128  	if es {
   129  		nonKibana, kibana := filterKibanaIndices(indexNamesRetrieved)
   130  		ti.kibanaTables = kibana
   131  		ti.queryTables = nonKibana
   132  	} else {
   133  		ti.queryTables = indexNamesRetrieved
   134  	}
   135  	ti.numIndices = len(indexNamesRetrieved)
   136  	return ti
   137  }
   138  
   139  func (ti *TableInfo) GetRawRequest() string {
   140  	return ti.rawRequest
   141  }
   142  
   143  // returns nonKibanaIndices, kibanaIndices
   144  func filterKibanaIndices(indexNames []string) ([]string, []string) {
   145  	kibanaIndices := make([]string, 0)
   146  	nonKibanaIndices := make([]string, 0)
   147  	for _, iName := range indexNames {
   148  		if strings.Contains(iName, ".kibana") {
   149  			kibanaIndices = append(kibanaIndices, iName)
   150  		} else {
   151  			nonKibanaIndices = append(nonKibanaIndices, iName)
   152  		}
   153  	}
   154  	return nonKibanaIndices, kibanaIndices
   155  }
   156  
   157  func (ti *TableInfo) String() string {
   158  	var buffer bytes.Buffer
   159  	buffer.WriteString("Raw Index: [")
   160  	buffer.WriteString(ti.rawRequest)
   161  	buffer.WriteString("] Expanded To ")
   162  	buffer.WriteString(strconv.FormatInt(int64(len(ti.queryTables)), 10))
   163  	buffer.WriteString(" Entries. There are: ")
   164  	buffer.WriteString(strconv.FormatInt(int64(len(ti.kibanaTables)), 10))
   165  	buffer.WriteString(" Elastic Indices. Sample: ")
   166  	buffer.WriteString(getIndexNamesCleanLogs(ti.queryTables))
   167  	return buffer.String()
   168  }
   169  
   170  func (ti *TableInfo) GetQueryTables() []string {
   171  	if ti == nil {
   172  		return make([]string, 0)
   173  	}
   174  	return ti.queryTables
   175  }
   176  
   177  func (ti *TableInfo) GetKibanaIndices() []string {
   178  	if ti == nil {
   179  		return make([]string, 0)
   180  	}
   181  	return ti.kibanaTables
   182  }
   183  
   184  func (ti *TableInfo) GetNumIndices() int {
   185  	if ti == nil {
   186  		return 0
   187  	}
   188  	return ti.numIndices
   189  }
   190  
   191  // gets the number of tables that will be queried
   192  func (qc *QueryContext) GetNumTables() int {
   193  	if qc.TableInfo == nil {
   194  		return 0
   195  	}
   196  	return qc.TableInfo.GetNumIndices()
   197  }
   198  
   199  func getIndexNamesCleanLogs(indices []string) string {
   200  	var indicesStr string
   201  	if len(indices) > 4 {
   202  		indicesStr = fmt.Sprintf("%v%s", indices[:4], ".....")
   203  	} else {
   204  		indicesStr = fmt.Sprintf("%v", indices)
   205  	}
   206  	return indicesStr
   207  }
   208  
   209  func InitQueryContext(indexRequest string, sizeLimit uint64, scroll int, orgid uint64, es bool) *QueryContext {
   210  	ti := InitTableInfo(indexRequest, orgid, es)
   211  	return &QueryContext{
   212  		TableInfo: ti,
   213  		SizeLimit: sizeLimit,
   214  		Scroll:    scroll,
   215  		Orgid:     orgid,
   216  	}
   217  }
   218  
   219  func InitQueryContextWithTableInfo(ti *TableInfo, sizeLimit uint64, scroll int, orgid uint64, es bool) *QueryContext {
   220  	return &QueryContext{
   221  		TableInfo: ti,
   222  		SizeLimit: sizeLimit,
   223  		Scroll:    scroll,
   224  		Orgid:     orgid,
   225  	}
   226  }
   227  
   228  func InitEntireFileBlockTracker() *BlockTracker {
   229  	return &BlockTracker{entireFile: true}
   230  }
   231  
   232  func InitExclusionBlockTracker(spqmr *pqmr.SegmentPQMRResults) *BlockTracker {
   233  	exclude := make(map[uint16]bool)
   234  	for _, blkNum := range spqmr.GetAllBlocks() {
   235  		exclude[blkNum] = true
   236  	}
   237  	return &BlockTracker{entireFile: false, excludeBlocks: exclude}
   238  }
   239  
   240  func (bt *BlockTracker) ShouldProcessBlock(blkNum uint16) bool {
   241  	if bt.entireFile {
   242  		return true
   243  	}
   244  	_, ok := bt.excludeBlocks[blkNum]
   245  	if !ok {
   246  		return true
   247  	} else {
   248  		return false
   249  	}
   250  }
   251  
   252  func (c *Condition) JoinCondition(add *Condition) {
   253  	if add == nil {
   254  		return
   255  	}
   256  
   257  	if add.FilterCriteria != nil && len(add.FilterCriteria) > 0 {
   258  		if c.FilterCriteria == nil {
   259  			c.FilterCriteria = add.FilterCriteria
   260  		} else {
   261  			c.FilterCriteria = append(c.FilterCriteria, add.FilterCriteria...)
   262  		}
   263  	}
   264  
   265  	if add.NestedNodes != nil && len(add.NestedNodes) > 0 {
   266  		if c.NestedNodes == nil {
   267  			c.NestedNodes = add.NestedNodes
   268  		} else {
   269  			c.NestedNodes = append(c.NestedNodes, add.NestedNodes...)
   270  		}
   271  	}
   272  }
   273  
   274  func (f *FilterCriteria) IsTimeRange() bool {
   275  	if f.MatchFilter != nil {
   276  		if f.MatchFilter.MatchColumn == "*" {
   277  			return true
   278  		}
   279  		return f.MatchFilter.MatchColumn == config.GetTimeStampKey()
   280  	} else {
   281  		return f.ExpressionFilter.IsTimeRange()
   282  	}
   283  }
   284  
   285  func (e *ExpressionFilter) IsTimeRange() bool {
   286  	if e.LeftInput != nil && e.LeftInput.Expression != nil {
   287  		if !e.LeftInput.Expression.IsTimeExpression() {
   288  			return false
   289  		}
   290  	}
   291  	if e.RightInput != nil && e.RightInput.Expression != nil {
   292  		if !e.RightInput.Expression.IsTimeExpression() {
   293  			return false
   294  		}
   295  	}
   296  	return true
   297  }
   298  
   299  func (e *ExpressionFilter) GetAllColumns() map[string]bool {
   300  	allCols := make(map[string]bool)
   301  	if e.LeftInput != nil && e.LeftInput.Expression != nil {
   302  		if e.LeftInput.Expression.RightInput != nil && len(e.LeftInput.Expression.RightInput.ColumnName) > 0 {
   303  			allCols[e.LeftInput.Expression.RightInput.ColumnName] = true
   304  
   305  		}
   306  		if e.LeftInput.Expression.LeftInput != nil && len(e.LeftInput.Expression.LeftInput.ColumnName) > 0 {
   307  			allCols[e.LeftInput.Expression.LeftInput.ColumnName] = true
   308  		}
   309  	}
   310  	if e.RightInput != nil && e.RightInput.Expression != nil {
   311  		if e.RightInput.Expression.RightInput != nil && len(e.RightInput.Expression.RightInput.ColumnName) > 0 {
   312  			allCols[e.RightInput.Expression.RightInput.ColumnName] = true
   313  		}
   314  		if e.RightInput.Expression.LeftInput != nil && len(e.RightInput.Expression.LeftInput.ColumnName) > 0 {
   315  			allCols[e.RightInput.Expression.LeftInput.ColumnName] = true
   316  		}
   317  	}
   318  	return allCols
   319  }
   320  
   321  func (f *FilterCriteria) GetAllColumns() map[string]bool {
   322  	if f.MatchFilter != nil {
   323  		allCols := make(map[string]bool)
   324  		allCols[f.MatchFilter.MatchColumn] = true
   325  		return allCols
   326  	}
   327  
   328  	return f.ExpressionFilter.GetAllColumns()
   329  }
   330  
   331  // we expect a matchColumn == * AND matchWords == *
   332  func (mf *MatchFilter) IsMatchAll() bool {
   333  	if mf.MatchType == MATCH_PHRASE {
   334  		return false
   335  	}
   336  
   337  	if mf.MatchColumn != "*" {
   338  		return false
   339  	}
   340  
   341  	if len(mf.MatchWords) != 1 {
   342  		return false
   343  	}
   344  	if bytes.Equal(mf.MatchWords[0], utils.STAR_BYTE) {
   345  		return true
   346  	}
   347  	return false
   348  }