github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/structs/searchnodestructs.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package structs
    18  
    19  import (
    20  	"bytes"
    21  
    22  	"github.com/siglens/siglens/pkg/config"
    23  	"github.com/siglens/siglens/pkg/segment/utils"
    24  	. "github.com/siglens/siglens/pkg/segment/utils"
    25  	log "github.com/sirupsen/logrus"
    26  )
    27  
    28  /*
    29  *******************************************
    30  *******************************************
    31  *******************************************
    32  **************** NOTE *********************
    33  any time you add a new element in the searchnode structs or in their sub structs,
    34  make sure to adjust the snhasher.go code to update the hashids, else PQS will
    35  incorrectly compute the hash id
    36  
    37  Also DO NOT change the order of the struct fields, if you do then you have to
    38  adjust the order in snhasher.go as well, however in that case the first time
    39  when the new code will run, it will create new pqid values for existing queries
    40  
    41  *******************************************
    42  *******************************************
    43  *******************************************
    44  */
    45  type SearchNodeType uint8
    46  
    47  const (
    48  	MatchAllQuery    SearchNodeType = iota // query only needs to know a record's time range, no raw values
    49  	ColumnValueQuery                       // query needs to know >0 non-timestamp column values
    50  	InvalidQuery                           // an invalid query (e.g. invalid column name)
    51  )
    52  
    53  // A Search query is either an expression or match filter
    54  // Never will both be defined
    55  type SearchQuery struct {
    56  	ExpressionFilter *SearchExpression
    57  	MatchFilter      *MatchFilter
    58  	SearchType       SearchQueryType // type of query
    59  	QueryInfo        *QueryInfo      // query info
    60  }
    61  
    62  type QueryInfo struct {
    63  	ColName string
    64  	KValDte []byte          // only non-nil for `MatchDictArray` requests
    65  	QValDte *DtypeEnclosure // column value to use for raw check. May be nil if query is match filter
    66  }
    67  
    68  type SearchCondition struct {
    69  	SearchQueries []*SearchQuery
    70  	SearchNode    []*SearchNode
    71  }
    72  
    73  type SearchNode struct {
    74  	AndSearchConditions       *SearchCondition
    75  	OrSearchConditions        *SearchCondition
    76  	ExclusionSearchConditions *SearchCondition
    77  	NodeType                  SearchNodeType // type of search request
    78  }
    79  
    80  func (q *SearchQuery) IsMatchAll() bool {
    81  	if q.ExpressionFilter != nil {
    82  		return q.ExpressionFilter.IsMatchAll()
    83  	} else {
    84  		return q.MatchFilter.IsMatchAll()
    85  	}
    86  }
    87  
    88  func (r *SearchNode) AddQueryInfoForNode() SearchNodeType {
    89  	nType := MatchAllQuery
    90  	if r.AndSearchConditions != nil {
    91  		currType := r.AndSearchConditions.AddQueryInfo()
    92  		if currType != MatchAllQuery {
    93  			nType = ColumnValueQuery
    94  		}
    95  	}
    96  	if r.OrSearchConditions != nil {
    97  		currType := r.OrSearchConditions.AddQueryInfo()
    98  		if currType != MatchAllQuery {
    99  			nType = ColumnValueQuery
   100  		}
   101  	}
   102  	if r.ExclusionSearchConditions != nil {
   103  		currType := r.ExclusionSearchConditions.AddQueryInfo()
   104  		if currType != MatchAllQuery {
   105  			nType = ColumnValueQuery
   106  		}
   107  	}
   108  	r.NodeType = nType
   109  	return nType
   110  }
   111  
   112  func (c *SearchCondition) AddQueryInfo() SearchNodeType {
   113  	nType := MatchAllQuery
   114  	if c.SearchNode != nil {
   115  		for _, sQuery := range c.SearchNode {
   116  			currType := sQuery.AddQueryInfoForNode()
   117  			if currType != MatchAllQuery {
   118  				nType = ColumnValueQuery
   119  			}
   120  		}
   121  	}
   122  	if c.SearchQueries != nil {
   123  		for _, sQuery := range c.SearchQueries {
   124  			currType := sQuery.GetQueryInfo()
   125  			if currType != MatchAllQuery {
   126  				nType = ColumnValueQuery
   127  			}
   128  		}
   129  	}
   130  	return nType
   131  }
   132  
   133  func (n *SearchQuery) GetQueryInfo() SearchNodeType {
   134  	var queryInfo *QueryInfo
   135  	if n.MatchFilter != nil {
   136  		queryInfo = n.MatchFilter.GetQueryInfo()
   137  	} else {
   138  		queryInfo = n.ExpressionFilter.GetQueryInfo()
   139  	}
   140  	n.QueryInfo = queryInfo
   141  	return n.GetQueryType()
   142  }
   143  
   144  func (q *SearchQuery) GetQueryType() SearchNodeType {
   145  	if q.ExpressionFilter != nil {
   146  		if !q.ExpressionFilter.IsTimeRangeFilter() {
   147  			return ColumnValueQuery
   148  		} else {
   149  			return MatchAllQuery
   150  		}
   151  	} else {
   152  		if q.MatchFilter.MatchColumn == "*" || q.MatchFilter.MatchColumn == config.GetTimeStampKey() {
   153  			for _, matchWord := range q.MatchFilter.MatchWords {
   154  				if bytes.Equal(matchWord, utils.STAR_BYTE) {
   155  					if q.MatchFilter.MatchOperator == Or {
   156  						return MatchAllQuery
   157  					} else if q.MatchFilter.MatchOperator == And && len(q.MatchFilter.MatchWords) > 1 {
   158  						return ColumnValueQuery
   159  					} else {
   160  						return MatchAllQuery
   161  					}
   162  				}
   163  			}
   164  		}
   165  		return ColumnValueQuery
   166  	}
   167  }
   168  
   169  func (m *MatchFilter) GetQueryInfo() *QueryInfo {
   170  	var kValDte []byte
   171  	var colName string
   172  	var qValDte *DtypeEnclosure
   173  
   174  	if m.MatchType == MATCH_DICT_ARRAY {
   175  		colName = m.MatchColumn
   176  		kValDte = m.MatchDictArray.MatchKey
   177  		qValDte = m.MatchDictArray.MatchValue
   178  	} else {
   179  		colName = m.MatchColumn
   180  	}
   181  	if qValDte != nil {
   182  		qValDte.AddStringAsByteSlice()
   183  	}
   184  	queryInfo := &QueryInfo{
   185  		ColName: colName,
   186  		KValDte: kValDte,
   187  		QValDte: qValDte,
   188  	}
   189  	return queryInfo
   190  }
   191  
   192  func (se *SearchExpression) GetQueryInfo() *QueryInfo {
   193  
   194  	var qColName string
   195  	var qValDte *DtypeEnclosure
   196  	if len(se.LeftSearchInput.ColumnName) > 0 {
   197  		qColName = se.LeftSearchInput.ColumnName
   198  		if se.RightSearchInput.ColumnValue != nil {
   199  			qValDte = se.RightSearchInput.ColumnValue
   200  		}
   201  	} else {
   202  		qColName = se.RightSearchInput.ColumnName
   203  		qValDte = se.LeftSearchInput.ColumnValue
   204  	}
   205  
   206  	if qValDte != nil {
   207  		qValDte.AddStringAsByteSlice()
   208  	}
   209  
   210  	qInfo := &QueryInfo{
   211  		ColName: qColName,
   212  		QValDte: qValDte,
   213  	}
   214  
   215  	return qInfo
   216  }
   217  
   218  // extract all columns from SearchQuery
   219  // returns a map[string]bool, where key is the column name
   220  // returns a bool that indicates whether a full wildcard is present (only "*")
   221  func (query *SearchQuery) GetAllColumnsInQuery() (map[string]bool, bool) {
   222  	if query.MatchFilter != nil {
   223  		result := make(map[string]bool)
   224  		if query.MatchFilter.MatchColumn == "*" {
   225  			return result, true
   226  		}
   227  		result[query.MatchFilter.MatchColumn] = true
   228  		return result, false
   229  	}
   230  
   231  	allExpressionCols := query.ExpressionFilter.getAllColumnsInSearch()
   232  	result := make(map[string]bool)
   233  	for col := range allExpressionCols {
   234  		if col == "*" {
   235  			return result, true
   236  		}
   237  		result[col] = true
   238  	}
   239  	return result, false
   240  }
   241  
   242  func (node *SearchNode) GetAllColumnsToSearch() (map[string]bool, bool) {
   243  
   244  	timestampCol := config.GetTimeStampKey()
   245  	allConditions, wildcard := GetAllColumnsFromNode(node)
   246  	allColumns := make(map[string]bool)
   247  	for colStr := range allConditions {
   248  		if colStr != timestampCol {
   249  			allColumns[colStr] = true
   250  		}
   251  	}
   252  	return allColumns, wildcard
   253  }
   254  
   255  func GetAllColumnsFromNode(node *SearchNode) (map[string]bool, bool) {
   256  	andCond, andWildcard := GetAllColumnsFromCondition(node.AndSearchConditions)
   257  	orCond, orWildcard := GetAllColumnsFromCondition(node.OrSearchConditions)
   258  
   259  	// don't add exclusion columns as they don't need to exist in the raw log line
   260  	// If exclusion condition exists, then treat it as a wildcard to get all entries to check exclusion conditions on
   261  	// TODO: optimize exclusion criteria
   262  	var exclusionWildcard bool
   263  	if node.ExclusionSearchConditions == nil {
   264  		exclusionWildcard = false
   265  	} else {
   266  		exclusionWildcard = true
   267  	}
   268  
   269  	for k, v := range orCond {
   270  		andCond[k] = v
   271  	}
   272  	return andCond, andWildcard || orWildcard || exclusionWildcard
   273  }
   274  
   275  // Get all columns that occur across a list of *SearchQuery
   276  // returns all columns and if any of the columns contains wildcards
   277  func GetAllColumnsFromCondition(cond *SearchCondition) (map[string]bool, bool) {
   278  	allUniqueColumns := make(map[string]bool) // first make a map to avoid duplicates
   279  
   280  	if cond == nil {
   281  		return allUniqueColumns, false
   282  	}
   283  
   284  	for _, query := range cond.SearchQueries {
   285  		currColumns, wildcard := query.GetAllColumnsInQuery()
   286  		if wildcard {
   287  			return allUniqueColumns, true
   288  		}
   289  
   290  		for k := range currColumns {
   291  			allUniqueColumns[k] = true
   292  		}
   293  	}
   294  
   295  	for _, node := range cond.SearchNode {
   296  		currColumns, wildcard := GetAllColumnsFromNode(node)
   297  		if wildcard {
   298  			return allUniqueColumns, true
   299  		}
   300  		for k, v := range currColumns {
   301  			allUniqueColumns[k] = v
   302  		}
   303  	}
   304  
   305  	allUniqueColumns[config.GetTimeStampKey()] = true
   306  
   307  	return allUniqueColumns, false
   308  }
   309  
   310  // returns map[string]bool, bool, LogicalOperator
   311  // map is all non-wildcard block bloom keys, bool is if any keyword contained a wildcard, LogicalOperator
   312  // is if any/all of map keys need to exist
   313  func (query *SearchQuery) GetAllBlockBloomKeysToSearch() (map[string]bool, bool, LogicalOperator) {
   314  
   315  	if query.MatchFilter != nil {
   316  		matchKeys, wildcardExists, matchOp := query.MatchFilter.GetAllBlockBloomKeysToSearch()
   317  		return matchKeys, wildcardExists, matchOp
   318  	} else {
   319  		blockBloomKeys, wildcardExists, err := query.ExpressionFilter.GetAllBlockBloomKeysToSearch()
   320  		if err != nil {
   321  			return make(map[string]bool), false, And
   322  		}
   323  		return blockBloomKeys, wildcardExists, And
   324  	}
   325  }
   326  
   327  func (query *SearchQuery) ExtractRangeFilterFromQuery(qid uint64) (map[string]string, FilterOperator, bool) {
   328  
   329  	if query.MatchFilter != nil {
   330  		return nil, Equals, false
   331  	}
   332  	return ExtractRangeFilterFromSearch(query.ExpressionFilter.LeftSearchInput,
   333  		query.ExpressionFilter.FilterOp, query.ExpressionFilter.RightSearchInput, qid)
   334  }
   335  
   336  // Given a left and right SearchInputs with filterOp, extract out range filters
   337  // Returns a map from column to value, the final operator, and a bool telling if a range filter has been found for these. False value means the search inputs have no range filters
   338  // (may have swapped from original if right has column and left and literal)
   339  func ExtractRangeFilterFromSearch(leftSearch *SearchExpressionInput, filterOp FilterOperator, rightSearch *SearchExpressionInput, qid uint64) (map[string]string, FilterOperator, bool) {
   340  
   341  	if filterOp == IsNull || filterOp == IsNotNull {
   342  		return nil, filterOp, false
   343  	}
   344  	rangeFilter := make(map[string]string)
   345  	var finalOp FilterOperator
   346  	if len(leftSearch.ColumnName) > 0 && rightSearch.ColumnValue != nil {
   347  		if !rightSearch.ColumnValue.IsNumeric() {
   348  			return nil, filterOp, false
   349  		}
   350  
   351  		// TODO: byte column name comparison
   352  		rangeFilter[leftSearch.ColumnName] = rightSearch.ColumnValue.StringVal
   353  		finalOp = filterOp
   354  
   355  		return rangeFilter, finalOp, true
   356  	} else if len(rightSearch.ColumnName) > 0 && leftSearch.ColumnValue != nil {
   357  		if !leftSearch.ColumnValue.IsNumeric() {
   358  			return rangeFilter, filterOp, false
   359  		}
   360  
   361  		// TODO: byte column name comparison
   362  		rangeFilter[rightSearch.ColumnName] = leftSearch.ColumnValue.StringVal
   363  		reflectedOp := ReflectFilterOperator[filterOp]
   364  		finalOp = reflectedOp
   365  
   366  		return rangeFilter, finalOp, true
   367  	} else {
   368  		// TODO: simply complex relations for range filters -> col1 * 2 > 5 --> col1 > 2.5
   369  		log.Warningf("qid=%d, Unable to extract range filter from %+v, and %+v", qid, leftSearch, rightSearch)
   370  	}
   371  
   372  	return rangeFilter, filterOp, false
   373  }
   374  
   375  func EditQueryTypeForInvalidColumn(originalType SearchNodeType) SearchNodeType {
   376  
   377  	// we were unable to extract columns we needed
   378  	if originalType != MatchAllQuery {
   379  		return InvalidQuery
   380  	}
   381  	return originalType
   382  }