github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/rawchecker.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package writer 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 24 . "github.com/siglens/siglens/pkg/segment/structs" 25 . "github.com/siglens/siglens/pkg/segment/utils" 26 27 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 28 "github.com/siglens/siglens/pkg/utils" 29 30 log "github.com/sirupsen/logrus" 31 ) 32 33 func ApplySearchToMatchFilterRawCsg(match *MatchFilter, col []byte) (bool, error) { 34 35 if len(match.MatchWords) == 0 { 36 return true, nil 37 } 38 39 if len(col) == 0 { 40 return false, errors.New("column does not exist") 41 } 42 43 if col[0] != VALTYPE_ENC_SMALL_STRING[0] { 44 return false, nil 45 } 46 47 idx := uint16(1) // for encoding type 48 // next 2 bytes tell us the len of column 49 clen := utils.BytesToUint16LittleEndian(col[idx : idx+COL_OFF_BYTE_SIZE]) 50 idx += COL_OFF_BYTE_SIZE 51 52 // todo MatchWords struct can store bytes 53 if match.MatchOperator == And { 54 var foundQword bool = true 55 if match.MatchType == MATCH_PHRASE { 56 if match.Regexp != nil { 57 foundQword = match.Regexp.Match(col[idx : idx+clen]) 58 } else { 59 foundQword = utils.IsSubWordPresent(col[idx:idx+clen], match.MatchPhrase) 60 } 61 } else { 62 for _, qword := range match.MatchWords { 63 foundQword = utils.IsSubWordPresent(col[idx:idx+clen], []byte(qword)) 64 if !foundQword { 65 break 66 } 67 } 68 } 69 return foundQword, nil 70 } 71 72 if match.MatchOperator == Or { 73 var foundQword bool 74 for _, qword := range match.MatchWords { 75 foundQword = utils.IsSubWordPresent(col[idx:idx+clen], []byte(qword)) 76 if foundQword { 77 return true, nil 78 } 79 } 80 return false, nil 81 } 82 83 return false, nil 84 } 85 86 func ApplySearchToDictArrayFilter(col []byte, qValDte *DtypeEnclosure, rec []byte, fop FilterOperator, isRegexSearch bool, 87 holderDte *DtypeEnclosure) (bool, error) { 88 if qValDte == nil { 89 return false, nil 90 } 91 92 if len(rec) == 0 || rec[0] != VALTYPE_DICT_ARRAY[0] { 93 return false, nil 94 } else if rec[0] == VALTYPE_DICT_ARRAY[0] { 95 //loop over the dict arrray till we reach the end 96 totalLen := utils.BytesToInt16LittleEndian(rec[1:]) 97 idx := uint16(3) 98 var keyEquals, valEquals bool 99 var err error 100 for idx < uint16(totalLen) { 101 strlen := utils.BytesToUint16LittleEndian(rec[idx : idx+2]) 102 idx += 2 103 if int(strlen) == len(col) { 104 keyEquals = bytes.Equal(rec[idx:idx+strlen], col) 105 } 106 idx += strlen 107 if !keyEquals { 108 switch rec[idx] { 109 case VALTYPE_ENC_SMALL_STRING[0]: 110 // one byte for type & two for reclen 111 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 112 idx += 3 + strlen 113 case VALTYPE_ENC_BOOL[0]: 114 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 115 idx += 3 + strlen 116 case VALTYPE_ENC_INT64[0], VALTYPE_ENC_FLOAT64[0]: 117 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 118 idx += 3 + strlen 119 default: 120 log.Errorf("ApplySearchToDictArrayFilter:SS_DT_ARRAY_DICT unknown type=%v\n", rec[idx]) 121 return false, errors.New("invalid rec type") 122 } 123 continue 124 } 125 switch rec[idx] { 126 case VALTYPE_ENC_SMALL_STRING[0]: 127 // one byte for type & two for reclen 128 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 129 idx += 3 130 valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes) 131 idx += strlen 132 case VALTYPE_ENC_BOOL[0]: 133 // valEquals, err = fopOnBool(rec[idx:], qValDte, fop) 134 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 135 idx += 3 136 valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes) 137 idx += strlen 138 case VALTYPE_ENC_INT64[0]: 139 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 140 idx += 3 141 valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes) 142 idx += strlen 143 case VALTYPE_ENC_FLOAT64[0]: 144 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 145 idx += 3 146 valEquals = bytes.Equal(rec[idx:idx+strlen], qValDte.StringValBytes) 147 idx += strlen 148 default: 149 log.Errorf("ApplySearchToDictArrayFilter:SS_DT_ARRAY_DICT unknown type=%v\n", rec[idx]) 150 return false, errors.New("invalid rec type") 151 } 152 if keyEquals && valEquals { 153 return true, nil 154 } 155 } 156 return keyEquals && valEquals, err 157 } 158 return false, nil 159 } 160 161 func ApplySearchToExpressionFilterSimpleCsg(qValDte *DtypeEnclosure, fop FilterOperator, 162 col []byte, isRegexSearch bool, holderDte *DtypeEnclosure) (bool, error) { 163 164 holderDte.Reset() 165 166 return filterOpOnDataType(col, qValDte, fop, isRegexSearch, holderDte) 167 } 168 169 func isValTypeEncANumber(valTypeEnc byte) bool { 170 switch valTypeEnc { 171 case VALTYPE_ENC_INT8[0], VALTYPE_ENC_INT16[0], VALTYPE_ENC_INT32[0], VALTYPE_ENC_INT64[0], 172 VALTYPE_ENC_UINT8[0], VALTYPE_ENC_UINT16[0], VALTYPE_ENC_UINT32[0], VALTYPE_ENC_UINT64[0], 173 VALTYPE_ENC_FLOAT64[0]: 174 return true 175 } 176 return false 177 } 178 179 func filterOpOnDataType(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator, 180 isRegexSearch bool, recDte *DtypeEnclosure) (bool, error) { 181 182 if qValDte == nil { 183 return false, nil 184 } 185 switch qValDte.Dtype { 186 case SS_DT_STRING: 187 if len(rec) == 0 || rec[0] != VALTYPE_ENC_SMALL_STRING[0] { 188 // if we are doing a regex search on a number, we need to convert the number to string 189 if isRegexSearch && isValTypeEncANumber(rec[0]) { 190 return filterOpOnRecNumberEncType(rec, qValDte, fop, isRegexSearch, recDte) 191 } 192 return false, nil 193 } 194 return fopOnString(rec, qValDte, fop, isRegexSearch) 195 case SS_DT_BOOL: 196 if len(rec) == 0 || rec[0] != VALTYPE_ENC_BOOL[0] { 197 return false, nil 198 } 199 return fopOnBool(rec, qValDte, fop) 200 case SS_DT_SIGNED_NUM, SS_DT_UNSIGNED_NUM, SS_DT_FLOAT: 201 return fopOnNumber(rec, qValDte, recDte, fop) 202 case SS_DT_BACKFILL: 203 return false, nil 204 default: 205 return false, errors.New("filterOpOnDataType:could not complete op") 206 } 207 } 208 209 func filterOpOnRecNumberEncType(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator, 210 isRegexSearch bool, recDte *DtypeEnclosure) (bool, error) { 211 212 if qValDte == nil || !isRegexSearch { 213 return false, nil 214 } 215 216 validNumberType, err := getNumberRecDte(rec, recDte) 217 if !validNumberType { 218 return false, err 219 } 220 221 regexp := qValDte.GetRegexp() 222 if regexp == nil { 223 return false, errors.New("qValDte had nil regexp compilation") 224 } 225 226 var recValString string 227 228 if recDte.Dtype == SS_DT_FLOAT { 229 recValString = fmt.Sprintf("%f", recDte.FloatVal) 230 } else if recDte.Dtype == SS_DT_UNSIGNED_NUM { 231 recValString = fmt.Sprintf("%d", recDte.UnsignedVal) 232 } else if recDte.Dtype == SS_DT_SIGNED_NUM { 233 recValString = fmt.Sprintf("%d", recDte.SignedVal) 234 } else { 235 return false, errors.New("filterOpOnRecNumberEncType: unknown dtype") 236 } 237 238 if fop == Equals { 239 return regexp.Match([]byte(recValString)), nil 240 } else if fop == NotEquals { 241 return !regexp.Match([]byte(recValString)), nil 242 } else { 243 return false, nil 244 } 245 246 } 247 248 func fopOnString(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator, 249 isRegexSearch bool) (bool, error) { 250 251 var sOff uint16 = 3 252 switch fop { 253 case Equals: 254 if isRegexSearch { 255 regexp := qValDte.GetRegexp() 256 if regexp == nil { 257 return false, errors.New("qValDte had nil regexp compilation") 258 } 259 return regexp.Match(rec[sOff:]), nil 260 } 261 if len(rec[sOff:]) != len(qValDte.StringVal) { 262 return false, nil 263 } 264 return bytes.Equal(rec[sOff:], qValDte.StringValBytes), nil 265 case NotEquals: 266 if isRegexSearch { 267 regexp := qValDte.GetRegexp() 268 if regexp == nil { 269 return false, errors.New("qValDte had nil regexp compilation") 270 } 271 return !regexp.Match(rec[sOff:]), nil 272 } 273 return !bytes.Equal(rec[sOff:], qValDte.StringValBytes), nil 274 } 275 return false, nil 276 } 277 278 func fopOnBool(rec []byte, qValDte *DtypeEnclosure, fop FilterOperator) (bool, error) { 279 280 switch fop { 281 case Equals: 282 return rec[1] == qValDte.BoolVal, nil 283 case NotEquals: 284 return rec[1] != qValDte.BoolVal, nil 285 } 286 return false, nil 287 } 288 289 func getNumberRecDte(rec []byte, recDte *DtypeEnclosure) (bool, error) { 290 if len(rec) == 0 { 291 return false, nil 292 } 293 // first find recDte's Dtype and typecast it 294 switch rec[0] { 295 case VALTYPE_ENC_BACKFILL[0]: 296 return false, nil 297 case VALTYPE_ENC_BOOL[0]: 298 return false, nil 299 case VALTYPE_ENC_SMALL_STRING[0]: 300 return false, nil 301 case VALTYPE_ENC_INT8[0]: 302 recDte.Dtype = SS_DT_SIGNED_NUM 303 recDte.SignedVal = int64(rec[1]) 304 case VALTYPE_ENC_INT16[0]: 305 recDte.Dtype = SS_DT_SIGNED_NUM 306 recDte.SignedVal = int64(utils.BytesToInt16LittleEndian(rec[1:3])) 307 case VALTYPE_ENC_INT32[0]: 308 recDte.Dtype = SS_DT_SIGNED_NUM 309 recDte.SignedVal = int64(utils.BytesToInt32LittleEndian(rec[1:5])) 310 case VALTYPE_ENC_INT64[0]: 311 recDte.Dtype = SS_DT_SIGNED_NUM 312 recDte.SignedVal = utils.BytesToInt64LittleEndian(rec[1:9]) 313 case VALTYPE_ENC_UINT8[0]: 314 recDte.Dtype = SS_DT_UNSIGNED_NUM 315 recDte.UnsignedVal = uint64(rec[1]) 316 case VALTYPE_ENC_UINT16[0]: 317 recDte.Dtype = SS_DT_UNSIGNED_NUM 318 recDte.UnsignedVal = uint64(utils.BytesToUint16LittleEndian(rec[1:3])) 319 case VALTYPE_ENC_UINT32[0]: 320 recDte.Dtype = SS_DT_UNSIGNED_NUM 321 recDte.UnsignedVal = uint64(utils.BytesToUint32LittleEndian(rec[1:5])) 322 case VALTYPE_ENC_UINT64[0]: 323 recDte.Dtype = SS_DT_UNSIGNED_NUM 324 recDte.UnsignedVal = utils.BytesToUint64LittleEndian(rec[1:9]) 325 case VALTYPE_ENC_FLOAT64[0]: 326 recDte.Dtype = SS_DT_FLOAT 327 recDte.FloatVal = utils.BytesToFloat64LittleEndian(rec[1:9]) 328 case VALTYPE_DICT_ARRAY[0], VALTYPE_RAW_JSON[0]: 329 return false, nil 330 default: 331 log.Errorf("fopOnNumber: dont know how to convert type=%v", rec[0]) 332 return false, errors.New("fopOnNumber: invalid rec type") 333 } 334 return true, nil 335 } 336 337 func fopOnNumber(rec []byte, qValDte *DtypeEnclosure, 338 recDte *DtypeEnclosure, op FilterOperator) (bool, error) { 339 340 validNumberType, err := getNumberRecDte(rec, recDte) 341 if !validNumberType { 342 return false, err 343 } 344 345 // now create a float (highest level for rec, only if we need to based on query 346 if qValDte.Dtype == SS_DT_FLOAT && recDte.Dtype != SS_DT_FLOAT { 347 // todo need to check err 348 recDte.FloatVal, _ = dtu.ConvertToFloat(recDte.UnsignedVal, 64) 349 } 350 351 return compareNumberDte(recDte, qValDte, op) 352 353 } 354 355 /* 356 We never convert any qValDte params, caller's responsibility to store 357 all possible values in a heierarichal order. 358 We will only convert the recDte (stored val) to appropriate formats as needed 359 */ 360 func compareNumberDte(recDte *DtypeEnclosure, qValDte *DtypeEnclosure, op FilterOperator) (bool, error) { 361 362 switch recDte.Dtype { 363 case SS_DT_FLOAT: 364 switch op { 365 case Equals: 366 return dtu.AlmostEquals(recDte.FloatVal, qValDte.FloatVal), nil 367 case NotEquals: 368 return !dtu.AlmostEquals(recDte.FloatVal, qValDte.FloatVal), nil 369 case LessThan: 370 return recDte.FloatVal < qValDte.FloatVal, nil 371 case LessThanOrEqualTo: 372 return recDte.FloatVal <= qValDte.FloatVal, nil 373 case GreaterThan: 374 return recDte.FloatVal > qValDte.FloatVal, nil 375 case GreaterThanOrEqualTo: 376 return recDte.FloatVal >= qValDte.FloatVal, nil 377 } 378 case SS_DT_UNSIGNED_NUM: 379 switch op { 380 case Equals: 381 return recDte.UnsignedVal == qValDte.UnsignedVal, nil 382 case NotEquals: 383 return recDte.UnsignedVal != qValDte.UnsignedVal, nil 384 case LessThan: 385 //todo rec is unsigned but if qVal is signed and is negative num we need to handle that case 386 return recDte.UnsignedVal < qValDte.UnsignedVal, nil 387 case LessThanOrEqualTo: 388 return recDte.UnsignedVal <= qValDte.UnsignedVal, nil 389 case GreaterThan: 390 return recDte.UnsignedVal > qValDte.UnsignedVal, nil 391 case GreaterThanOrEqualTo: 392 return recDte.UnsignedVal >= qValDte.UnsignedVal, nil 393 } 394 case SS_DT_SIGNED_NUM: 395 switch op { 396 case Equals: 397 return recDte.SignedVal == qValDte.SignedVal, nil 398 case NotEquals: 399 return recDte.SignedVal != qValDte.SignedVal, nil 400 case LessThan: 401 return recDte.SignedVal < qValDte.SignedVal, nil 402 case LessThanOrEqualTo: 403 return recDte.SignedVal <= qValDte.SignedVal, nil 404 case GreaterThan: 405 return recDte.SignedVal > qValDte.SignedVal, nil 406 case GreaterThanOrEqualTo: 407 return recDte.SignedVal >= qValDte.SignedVal, nil 408 } 409 } 410 log.Errorf("CompareNumbers: unknown op=%v or recDte=%v, qValDte=%v", op, recDte, qValDte) 411 return false, errors.New("unknown op or dtype") 412 }