github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/rawchecker.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  
    24  	. "github.com/siglens/siglens/pkg/segment/structs"
    25  	. "github.com/siglens/siglens/pkg/segment/utils"
    26  
    27  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    28  	"github.com/siglens/siglens/pkg/utils"
    29  
    30  	log "github.com/sirupsen/logrus"
    31  )
    32  
    33  func ApplySearchToMatchFilterRawCsg(match *MatchFilter, col []byte) (bool, error) {
    34  
    35  	if len(match.MatchWords) == 0 {
    36  		return true, nil
    37  	}
    38  
    39  	if len(col) == 0 {
    40  		return false, errors.New("column does not exist")
    41  	}
    42  
    43  	if col[0] != VALTYPE_ENC_SMALL_STRING[0] {
    44  		return false, nil
    45  	}
    46  
    47  	idx := uint16(1) // for encoding type
    48  	// next 2 bytes tell us the len of column
    49  	clen := utils.BytesToUint16LittleEndian(col[idx : idx+COL_OFF_BYTE_SIZE])
    50  	idx += COL_OFF_BYTE_SIZE
    51  
    52  	// todo MatchWords struct can store bytes
    53  	if match.MatchOperator == And {
    54  		var foundQword bool = true
    55  		if match.MatchType == MATCH_PHRASE {
    56  			if match.Regexp != nil {
    57  				foundQword = match.Regexp.Match(col[idx : idx+clen])
    58  			} else {
    59  				foundQword = utils.IsSubWordPresent(col[idx:idx+clen], match.MatchPhrase)
    60  			}
    61  		} else {
    62  			for _, qword := range match.MatchWords {
    63  				foundQword = utils.IsSubWordPresent(col[idx:idx+clen], []byte(qword))
    64  				if !foundQword {
    65  					break
    66  				}
    67  			}
    68  		}
    69  		return foundQword, nil
    70  	}
    71  
    72  	if match.MatchOperator == Or {
    73  		var foundQword bool
    74  		for _, qword := range match.MatchWords {
    75  			foundQword = utils.IsSubWordPresent(col[idx:idx+clen], []byte(qword))
    76  			if foundQword {
    77  				return true, nil
    78  			}
    79  		}
    80  		return false, nil
    81  	}
    82  
    83  	return false, nil
    84  }
    85  
    86  func ApplySearchToDictArrayFilter(col []byte, qValDte *DtypeEnclosure, rec []byte, fop FilterOperator, isRegexSearch bool,
    87  	holderDte *DtypeEnclosure) (bool, error) {
    88  	if qValDte == nil {
    89  		return false, nil
    90  	}
    91  
    92  	if len(rec) == 0 || rec[0] != VALTYPE_DICT_ARRAY[0] {
    93  		return false, nil
    94  	} else if rec[0] == VALTYPE_DICT_ARRAY[0] {
    95  		//loop over the dict arrray till we reach the end
    96  		totalLen := utils.BytesToInt16LittleEndian(rec[1:])
    97  		idx := uint16(3)
    98  		var keyEquals, valEquals bool
    99  		var err error
   100  		for idx < uint16(totalLen) {
   101  			strlen := utils.BytesToUint16LittleEndian(rec[idx : idx+2])
   102  			idx += 2
   103  			if int(strlen) == len(col) {
   104  				keyEquals = bytes.Equal(rec[idx:idx+strlen], col)
   105  			}
   106  			idx += strlen
   107  			if !keyEquals {
   108  				switch rec[idx] {
   109  				case VALTYPE_ENC_SMALL_STRING[0]:
   110  					// one byte for type & two for reclen
   111  					strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   112  					idx += 3 + strlen
   113  				case VALTYPE_ENC_BOOL[0]:
   114  					strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   115  					idx += 3 + strlen
   116  				case VALTYPE_ENC_INT64[0], VALTYPE_ENC_FLOAT64[0]:
   117  					strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   118  					idx += 3 + strlen
   119  				default:
   120  					log.Errorf("ApplySearchToDictArrayFilter:SS_DT_ARRAY_DICT unknown type=%v\n", rec[idx])
   121  					return false, errors.New("invalid rec type")
   122  				}
   123  				continue
   124  			}
   125  			switch rec[idx] {
   126  			case VALTYPE_ENC_SMALL_STRING[0]:
   127  				// one byte for type & two for reclen
   128  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   129  				idx += 3
   130  				valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes)
   131  				idx += strlen
   132  			case VALTYPE_ENC_BOOL[0]:
   133  				// valEquals, err = fopOnBool(rec[idx:], qValDte, fop)
   134  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   135  				idx += 3
   136  				valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes)
   137  				idx += strlen
   138  			case VALTYPE_ENC_INT64[0]:
   139  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   140  				idx += 3
   141  				valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes)
   142  				idx += strlen
   143  			case VALTYPE_ENC_FLOAT64[0]:
   144  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   145  				idx += 3
   146  				valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes)
   147  				idx += strlen
   148  			default:
   149  				log.Errorf("ApplySearchToDictArrayFilter:SS_DT_ARRAY_DICT unknown type=%v\n", rec[idx])
   150  				return false, errors.New("invalid rec type")
   151  			}
   152  			if keyEquals && valEquals {
   153  				return true, nil
   154  			}
   155  		}
   156  		return keyEquals && valEquals, err
   157  	}
   158  	return false, nil
   159  }
   160  
   161  func ApplySearchToExpressionFilterSimpleCsg(qValDte *DtypeEnclosure, fop FilterOperator,
   162  	col []byte, isRegexSearch bool, holderDte *DtypeEnclosure) (bool, error) {
   163  
   164  	holderDte.Reset()
   165  
   166  	return filterOpOnDataType(col, qValDte, fop, isRegexSearch, holderDte)
   167  }
   168  
   169  func isValTypeEncANumber(valTypeEnc byte) bool {
   170  	switch valTypeEnc {
   171  	case VALTYPE_ENC_INT8[0], VALTYPE_ENC_INT16[0], VALTYPE_ENC_INT32[0], VALTYPE_ENC_INT64[0],
   172  		VALTYPE_ENC_UINT8[0], VALTYPE_ENC_UINT16[0], VALTYPE_ENC_UINT32[0], VALTYPE_ENC_UINT64[0],
   173  		VALTYPE_ENC_FLOAT64[0]:
   174  		return true
   175  	}
   176  	return false
   177  }
   178  
   179  func filterOpOnDataType(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator,
   180  	isRegexSearch bool, recDte *DtypeEnclosure) (bool, error) {
   181  
   182  	if qValDte == nil {
   183  		return false, nil
   184  	}
   185  	switch qValDte.Dtype {
   186  	case SS_DT_STRING:
   187  		if len(rec) == 0 || rec[0] != VALTYPE_ENC_SMALL_STRING[0] {
   188  			// if we are doing a regex search on a number, we need to convert the number to string
   189  			if isRegexSearch && isValTypeEncANumber(rec[0]) {
   190  				return filterOpOnRecNumberEncType(rec, qValDte, fop, isRegexSearch, recDte)
   191  			}
   192  			return false, nil
   193  		}
   194  		return fopOnString(rec, qValDte, fop, isRegexSearch)
   195  	case SS_DT_BOOL:
   196  		if len(rec) == 0 || rec[0] != VALTYPE_ENC_BOOL[0] {
   197  			return false, nil
   198  		}
   199  		return fopOnBool(rec, qValDte, fop)
   200  	case SS_DT_SIGNED_NUM, SS_DT_UNSIGNED_NUM, SS_DT_FLOAT:
   201  		return fopOnNumber(rec, qValDte, recDte, fop)
   202  	case SS_DT_BACKFILL:
   203  		return false, nil
   204  	default:
   205  		return false, errors.New("filterOpOnDataType:could not complete op")
   206  	}
   207  }
   208  
   209  func filterOpOnRecNumberEncType(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator,
   210  	isRegexSearch bool, recDte *DtypeEnclosure) (bool, error) {
   211  
   212  	if qValDte == nil || !isRegexSearch {
   213  		return false, nil
   214  	}
   215  
   216  	validNumberType, err := getNumberRecDte(rec, recDte)
   217  	if !validNumberType {
   218  		return false, err
   219  	}
   220  
   221  	regexp := qValDte.GetRegexp()
   222  	if regexp == nil {
   223  		return false, errors.New("qValDte had nil regexp compilation")
   224  	}
   225  
   226  	var recValString string
   227  
   228  	if recDte.Dtype == SS_DT_FLOAT {
   229  		recValString = fmt.Sprintf("%f", recDte.FloatVal)
   230  	} else if recDte.Dtype == SS_DT_UNSIGNED_NUM {
   231  		recValString = fmt.Sprintf("%d", recDte.UnsignedVal)
   232  	} else if recDte.Dtype == SS_DT_SIGNED_NUM {
   233  		recValString = fmt.Sprintf("%d", recDte.SignedVal)
   234  	} else {
   235  		return false, errors.New("filterOpOnRecNumberEncType: unknown dtype")
   236  	}
   237  
   238  	if fop == Equals {
   239  		return regexp.Match([]byte(recValString)), nil
   240  	} else if fop == NotEquals {
   241  		return !regexp.Match([]byte(recValString)), nil
   242  	} else {
   243  		return false, nil
   244  	}
   245  
   246  }
   247  
   248  func fopOnString(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator,
   249  	isRegexSearch bool) (bool, error) {
   250  
   251  	var sOff uint16 = 3
   252  	switch fop {
   253  	case Equals:
   254  		if isRegexSearch {
   255  			regexp := qValDte.GetRegexp()
   256  			if regexp == nil {
   257  				return false, errors.New("qValDte had nil regexp compilation")
   258  			}
   259  			return regexp.Match(rec[sOff:]), nil
   260  		}
   261  		if len(rec[sOff:]) != len(qValDte.StringVal) {
   262  			return false, nil
   263  		}
   264  		return bytes.Equal(rec[sOff:], qValDte.StringValBytes), nil
   265  	case NotEquals:
   266  		if isRegexSearch {
   267  			regexp := qValDte.GetRegexp()
   268  			if regexp == nil {
   269  				return false, errors.New("qValDte had nil regexp compilation")
   270  			}
   271  			return !regexp.Match(rec[sOff:]), nil
   272  		}
   273  		return !bytes.Equal(rec[sOff:], qValDte.StringValBytes), nil
   274  	}
   275  	return false, nil
   276  }
   277  
   278  func fopOnBool(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator) (bool, error) {
   279  
   280  	switch fop {
   281  	case Equals:
   282  		return rec[1] == qValDte.BoolVal, nil
   283  	case NotEquals:
   284  		return rec[1] != qValDte.BoolVal, nil
   285  	}
   286  	return false, nil
   287  }
   288  
   289  func getNumberRecDte(rec []byte, recDte *DtypeEnclosure) (bool, error) {
   290  	if len(rec) == 0 {
   291  		return false, nil
   292  	}
   293  	// first find recDte's Dtype and typecast it
   294  	switch rec[0] {
   295  	case VALTYPE_ENC_BACKFILL[0]:
   296  		return false, nil
   297  	case VALTYPE_ENC_BOOL[0]:
   298  		return false, nil
   299  	case VALTYPE_ENC_SMALL_STRING[0]:
   300  		return false, nil
   301  	case VALTYPE_ENC_INT8[0]:
   302  		recDte.Dtype = SS_DT_SIGNED_NUM
   303  		recDte.SignedVal = int64(rec[1])
   304  	case VALTYPE_ENC_INT16[0]:
   305  		recDte.Dtype = SS_DT_SIGNED_NUM
   306  		recDte.SignedVal = int64(utils.BytesToInt16LittleEndian(rec[1:3]))
   307  	case VALTYPE_ENC_INT32[0]:
   308  		recDte.Dtype = SS_DT_SIGNED_NUM
   309  		recDte.SignedVal = int64(utils.BytesToInt32LittleEndian(rec[1:5]))
   310  	case VALTYPE_ENC_INT64[0]:
   311  		recDte.Dtype = SS_DT_SIGNED_NUM
   312  		recDte.SignedVal = utils.BytesToInt64LittleEndian(rec[1:9])
   313  	case VALTYPE_ENC_UINT8[0]:
   314  		recDte.Dtype = SS_DT_UNSIGNED_NUM
   315  		recDte.UnsignedVal = uint64(rec[1])
   316  	case VALTYPE_ENC_UINT16[0]:
   317  		recDte.Dtype = SS_DT_UNSIGNED_NUM
   318  		recDte.UnsignedVal = uint64(utils.BytesToUint16LittleEndian(rec[1:3]))
   319  	case VALTYPE_ENC_UINT32[0]:
   320  		recDte.Dtype = SS_DT_UNSIGNED_NUM
   321  		recDte.UnsignedVal = uint64(utils.BytesToUint32LittleEndian(rec[1:5]))
   322  	case VALTYPE_ENC_UINT64[0]:
   323  		recDte.Dtype = SS_DT_UNSIGNED_NUM
   324  		recDte.UnsignedVal = utils.BytesToUint64LittleEndian(rec[1:9])
   325  	case VALTYPE_ENC_FLOAT64[0]:
   326  		recDte.Dtype = SS_DT_FLOAT
   327  		recDte.FloatVal = utils.BytesToFloat64LittleEndian(rec[1:9])
   328  	case VALTYPE_DICT_ARRAY[0], VALTYPE_RAW_JSON[0]:
   329  		return false, nil
   330  	default:
   331  		log.Errorf("fopOnNumber: dont know how to convert type=%v", rec[0])
   332  		return false, errors.New("fopOnNumber: invalid rec type")
   333  	}
   334  	return true, nil
   335  }
   336  
   337  func fopOnNumber(rec []byte, qValDte *DtypeEnclosure,
   338  	recDte *DtypeEnclosure, op FilterOperator) (bool, error) {
   339  
   340  	validNumberType, err := getNumberRecDte(rec, recDte)
   341  	if !validNumberType {
   342  		return false, err
   343  	}
   344  
   345  	// now create a float (highest level for rec, only if we need to based on query
   346  	if qValDte.Dtype == SS_DT_FLOAT && recDte.Dtype != SS_DT_FLOAT {
   347  		// todo need to check err
   348  		recDte.FloatVal, _ = dtu.ConvertToFloat(recDte.UnsignedVal, 64)
   349  	}
   350  
   351  	return compareNumberDte(recDte, qValDte, op)
   352  
   353  }
   354  
   355  /*
   356  We never convert any qValDte params, caller's responsibility to store
   357  all possible values in a heierarichal order.
   358  We will only convert the recDte (stored val) to appropriate formats as needed
   359  */
   360  func compareNumberDte(recDte *DtypeEnclosure, qValDte *DtypeEnclosure, op FilterOperator) (bool, error) {
   361  
   362  	switch recDte.Dtype {
   363  	case SS_DT_FLOAT:
   364  		switch op {
   365  		case Equals:
   366  			return dtu.AlmostEquals(recDte.FloatVal, qValDte.FloatVal), nil
   367  		case NotEquals:
   368  			return !dtu.AlmostEquals(recDte.FloatVal, qValDte.FloatVal), nil
   369  		case LessThan:
   370  			return recDte.FloatVal < qValDte.FloatVal, nil
   371  		case LessThanOrEqualTo:
   372  			return recDte.FloatVal <= qValDte.FloatVal, nil
   373  		case GreaterThan:
   374  			return recDte.FloatVal > qValDte.FloatVal, nil
   375  		case GreaterThanOrEqualTo:
   376  			return recDte.FloatVal >= qValDte.FloatVal, nil
   377  		}
   378  	case SS_DT_UNSIGNED_NUM:
   379  		switch op {
   380  		case Equals:
   381  			return recDte.UnsignedVal == qValDte.UnsignedVal, nil
   382  		case NotEquals:
   383  			return recDte.UnsignedVal != qValDte.UnsignedVal, nil
   384  		case LessThan:
   385  			//todo rec is unsigned but if qVal is signed and is negative num we need to handle that case
   386  			return recDte.UnsignedVal < qValDte.UnsignedVal, nil
   387  		case LessThanOrEqualTo:
   388  			return recDte.UnsignedVal <= qValDte.UnsignedVal, nil
   389  		case GreaterThan:
   390  			return recDte.UnsignedVal > qValDte.UnsignedVal, nil
   391  		case GreaterThanOrEqualTo:
   392  			return recDte.UnsignedVal >= qValDte.UnsignedVal, nil
   393  		}
   394  	case SS_DT_SIGNED_NUM:
   395  		switch op {
   396  		case Equals:
   397  			return recDte.SignedVal == qValDte.SignedVal, nil
   398  		case NotEquals:
   399  			return recDte.SignedVal != qValDte.SignedVal, nil
   400  		case LessThan:
   401  			return recDte.SignedVal < qValDte.SignedVal, nil
   402  		case LessThanOrEqualTo:
   403  			return recDte.SignedVal <= qValDte.SignedVal, nil
   404  		case GreaterThan:
   405  			return recDte.SignedVal > qValDte.SignedVal, nil
   406  		case GreaterThanOrEqualTo:
   407  			return recDte.SignedVal >= qValDte.SignedVal, nil
   408  		}
   409  	}
   410  	log.Errorf("CompareNumbers: unknown op=%v or recDte=%v, qValDte=%v", op, recDte, qValDte)
   411  	return false, errors.New("unknown op or dtype")
   412  }