github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/function/func_builtin_regexp.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package function 16 17 import ( 18 "bytes" 19 "fmt" 20 "regexp" 21 "unicode/utf8" 22 23 "github.com/matrixorigin/matrixone/pkg/container/nulls" 24 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/util" 27 "github.com/matrixorigin/matrixone/pkg/container/types" 28 "github.com/matrixorigin/matrixone/pkg/container/vector" 29 "github.com/matrixorigin/matrixone/pkg/sql/plan/function/functionUtil" 30 "github.com/matrixorigin/matrixone/pkg/vm/process" 31 ) 32 33 const ( 34 DefaultEscapeChar = '\\' 35 36 mapSizeForRegexp = 100 37 ) 38 39 type opBuiltInRegexp struct { 40 regMap regexpSet 41 } 42 43 func newOpBuiltInRegexp() *opBuiltInRegexp { 44 return &opBuiltInRegexp{ 45 regMap: regexpSet{ 46 mp: make(map[string]*regexp.Regexp, mapSizeForRegexp), 47 }, 48 } 49 } 50 51 func (op *opBuiltInRegexp) likeFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 52 p1 := vector.GenerateFunctionStrParameter(parameters[0]) 53 p2 := vector.GenerateFunctionStrParameter(parameters[1]) 54 rs := vector.MustFunctionResult[bool](result) 55 56 // optimize rule for some special case. 57 if parameters[1].IsConst() { 58 canOptimize, err := optimizeRuleForLike(p1, p2, rs, length, func(i []byte) []byte { 59 return i 60 }) 61 if canOptimize { 62 return err 63 } 64 } 65 66 return opBinaryBytesBytesToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 []byte) (bool, error) { 67 return op.regMap.regularMatchForLikeOp(v2, v1) 68 }) 69 } 70 71 func (op *opBuiltInRegexp) iLikeFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 72 p1 := vector.GenerateFunctionStrParameter(parameters[0]) 73 p2 := vector.GenerateFunctionStrParameter(parameters[1]) 74 rs := vector.MustFunctionResult[bool](result) 75 76 // optimize rule for some special case. 77 if parameters[1].IsConst() { 78 canOptimize, err := optimizeRuleForLike(p1, p2, rs, length, func(i []byte) []byte { 79 return bytes.ToLower(i) 80 }) 81 if canOptimize { 82 return err 83 } 84 } 85 86 return opBinaryBytesBytesToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 []byte) (bool, error) { 87 return op.regMap.regularMatchForLikeOp(bytes.ToLower(v2), bytes.ToLower(v1)) 88 }) 89 } 90 91 func optimizeRuleForLike(p1, p2 vector.FunctionParameterWrapper[types.Varlena], rs *vector.FunctionResult[bool], length int, 92 specialFnForV func([]byte) []byte) (bool, error) { 93 pat, null := p2.GetStrValue(0) 94 if null { 95 nulls.AddRange(rs.GetResultVector().GetNulls(), 0, uint64(length)) 96 return true, nil 97 } 98 pat = specialFnForV(pat) 99 100 n := len(pat) 101 // opt rule #1: if expr is empty string, only empty string like empty string. 102 if n == 0 { 103 for i := uint64(0); i < uint64(length); i++ { 104 v1, null1 := p1.GetStrValue(i) 105 v1 = specialFnForV(v1) 106 if err := rs.Append(len(v1) == 0, null1); err != nil { 107 return true, err 108 } 109 } 110 return true, nil 111 } 112 // opt rule #2.1: anything matches % 113 if n == 1 && pat[0] == '%' { 114 for i := uint64(0); i < uint64(length); i++ { 115 _, null1 := p1.GetStrValue(i) 116 if err := rs.Append(true, null1); err != nil { 117 return true, err 118 } 119 } 120 return true, nil 121 } 122 // opt rule #2.2: single char matches _ 123 // XXX in UTF8 world, should we do single RUNE matches _? 124 if n == 1 && pat[0] == '_' { 125 for i := uint64(0); i < uint64(length); i++ { 126 v1, null1 := p1.GetStrValue(i) 127 v1 = specialFnForV(v1) 128 if err := rs.Append(len(v1) == 1, null1); err != nil { 129 return true, err 130 } 131 } 132 return true, nil 133 } 134 // opt rule #2.3: single char, no wild card, so it is a simple compare eq. 135 if n == 1 && pat[0] != '_' && pat[0] != '%' { 136 for i := uint64(0); i < uint64(length); i++ { 137 v1, null1 := p1.GetStrValue(i) 138 v1 = specialFnForV(v1) 139 if err := rs.Append(len(v1) == 1 && v1[0] == pat[0], null1); err != nil { 140 return true, err 141 } 142 } 143 return true, nil 144 } 145 146 // opt rule #3: [_%]somethingInBetween[_%] 147 if n > 1 { 148 c0, c1 := pat[0], pat[n-1] 149 if !bytes.ContainsAny(pat[1:len(pat)-1], "_%") { 150 if n > 2 && pat[n-2] == DefaultEscapeChar { 151 c1 = DefaultEscapeChar 152 } 153 switch { 154 case !(c0 == '%' || c0 == '_') && !(c1 == '%' || c1 == '_'): 155 // Rule 4.1: no wild card, so it is a simple compare eq. 156 for i := uint64(0); i < uint64(length); i++ { 157 v1, null1 := p1.GetStrValue(i) 158 v1 = specialFnForV(v1) 159 if err := rs.Append(len(v1) == n && bytes.Equal(pat, v1), null1); err != nil { 160 return true, err 161 } 162 } 163 return true, nil 164 165 case c0 == '_' && !(c1 == '%' || c1 == '_'): 166 // Rule 4.2: _foobarzoo, 167 for i := uint64(0); i < uint64(length); i++ { 168 v1, null1 := p1.GetStrValue(i) 169 v1 = specialFnForV(v1) 170 if err := rs.Append(len(v1) == n && bytes.Equal(pat[1:], v1[1:]), null1); err != nil { 171 return true, err 172 } 173 } 174 return true, nil 175 176 case c0 == '%' && !(c1 == '%' || c1 == '_'): 177 // Rule 4.3, %foobarzoo, it turns into a suffix match. 178 suffix := functionUtil.RemoveEscapeChar(pat[1:], DefaultEscapeChar) 179 for i := uint64(0); i < uint64(length); i++ { 180 v1, null1 := p1.GetStrValue(i) 181 v1 = specialFnForV(v1) 182 if err := rs.Append(bytes.HasSuffix(v1, suffix), null1); err != nil { 183 return true, err 184 } 185 } 186 return true, nil 187 188 case c1 == '_' && !(c0 == '%' || c0 == '_'): 189 // Rule 4.4, foobarzoo_, it turns into eq ingoring last char. 190 prefix := functionUtil.RemoveEscapeChar(pat[:n-1], DefaultEscapeChar) 191 for i := uint64(0); i < uint64(length); i++ { 192 v1, null1 := p1.GetStrValue(i) 193 v1 = specialFnForV(v1) 194 if err := rs.Append(len(v1) == n && bytes.Equal(prefix, v1[:n-1]), null1); err != nil { 195 return true, err 196 } 197 } 198 return true, nil 199 200 case c1 == '%' && !(c0 == '%' || c0 == '_'): 201 // Rule 4.5 foobarzoo%, prefix match 202 prefix := functionUtil.RemoveEscapeChar(pat[:n-1], DefaultEscapeChar) 203 for i := uint64(0); i < uint64(length); i++ { 204 v1, null1 := p1.GetStrValue(i) 205 v1 = specialFnForV(v1) 206 if err := rs.Append(bytes.HasPrefix(v1, prefix), null1); err != nil { 207 return true, err 208 } 209 } 210 return true, nil 211 212 case c0 == '%' && c1 == '%': 213 // Rule 4.6 %foobarzoo%, now it is contains 214 substr := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar) 215 for i := uint64(0); i < uint64(length); i++ { 216 v1, null1 := p1.GetStrValue(i) 217 v1 = specialFnForV(v1) 218 if err := rs.Append(bytes.Contains(v1, substr), null1); err != nil { 219 return true, err 220 } 221 } 222 return true, nil 223 224 case c0 == '%' && c1 == '_': 225 // Rule 4.7 %foobarzoo_, 226 suffix := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar) 227 for i := uint64(0); i < uint64(length); i++ { 228 v1, null1 := p1.GetStrValue(i) 229 v1 = specialFnForV(v1) 230 if err := rs.Append(len(v1) > 0 && bytes.HasSuffix(v1[:len(v1)-1], suffix), null1); err != nil { 231 return true, err 232 } 233 } 234 return true, nil 235 236 case c0 == '_' && c1 == '%': 237 // Rule 4.8 _foobarzoo% 238 prefix := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar) 239 for i := uint64(0); i < uint64(length); i++ { 240 v1, null1 := p1.GetStrValue(i) 241 v1 = specialFnForV(v1) 242 if err := rs.Append(len(v1) > 0 && bytes.HasPrefix(v1[1:], prefix), null1); err != nil { 243 return true, err 244 } 245 } 246 return true, nil 247 } 248 } else if c0 == '%' && c1 == '%' && !bytes.Contains(pat[1:len(pat)-1], []byte{'_'}) && !bytes.Contains(pat, []byte{'\\', '%'}) { 249 pat0 := pat[1:] 250 var subpats [][]byte 251 for { 252 idx := bytes.IndexByte(pat0, '%') 253 if idx == -1 { 254 break 255 } 256 subpats = append(subpats, pat0[:idx]) 257 pat0 = pat0[idx+1:] 258 } 259 260 outer: 261 for i := uint64(0); i < uint64(length); i++ { 262 v1, null1 := p1.GetStrValue(i) 263 if null1 { 264 rs.AppendMustNull() 265 } else { 266 for _, sp := range subpats { 267 idx := bytes.Index(v1, sp) 268 if idx == -1 { 269 rs.AppendMustValue(false) 270 continue outer 271 } 272 v1 = v1[idx+len(sp):] 273 } 274 rs.AppendMustValue(true) 275 } 276 } 277 return true, nil 278 } 279 } 280 return false, nil 281 } 282 283 func (op *opBuiltInRegexp) builtInRegMatch(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 284 return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) { 285 reg, err := op.regMap.getRegularMatcher(v2) 286 if err != nil { 287 return false, err 288 } 289 return reg.MatchString(v1), nil 290 }) 291 } 292 293 func (op *opBuiltInRegexp) builtInNotRegMatch(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 294 return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) { 295 reg, err := op.regMap.getRegularMatcher(v2) 296 if err != nil { 297 return false, err 298 } 299 return !reg.MatchString(v1), nil 300 }) 301 } 302 303 func (op *opBuiltInRegexp) builtInRegexpSubstr(parameters []*vector.Vector, result vector.FunctionResultWrapper, _ *process.Process, length int) error { 304 p1 := vector.GenerateFunctionStrParameter(parameters[0]) 305 p2 := vector.GenerateFunctionStrParameter(parameters[1]) 306 307 rs := vector.MustFunctionResult[types.Varlena](result) 308 switch len(parameters) { 309 case 2: 310 for i := uint64(0); i < uint64(length); i++ { 311 v1, null1 := p1.GetStrValue(i) 312 v2, null2 := p2.GetStrValue(i) 313 if null1 || null2 || len(v2) == 0 { 314 if err := rs.AppendBytes(nil, true); err != nil { 315 return err 316 } 317 } else { 318 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 319 match, res, err := op.regMap.regularSubstr(pat, expr, 1, 1) 320 if err != nil { 321 return err 322 } 323 if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil { 324 return err 325 } 326 } 327 } 328 329 case 3: 330 positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2]) 331 for i := uint64(0); i < uint64(length); i++ { 332 v1, null1 := p1.GetStrValue(i) 333 v2, null2 := p2.GetStrValue(i) 334 pos, null3 := positions.GetValue(i) 335 if null1 || null2 || null3 || len(v2) == 0 { 336 if err := rs.AppendBytes(nil, true); err != nil { 337 return err 338 } 339 } else { 340 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 341 match, res, err := op.regMap.regularSubstr(pat, expr, pos, 1) 342 if err != nil { 343 return err 344 } 345 if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil { 346 return err 347 } 348 } 349 } 350 351 case 4: 352 positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2]) 353 occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3]) 354 for i := uint64(0); i < uint64(length); i++ { 355 v1, null1 := p1.GetStrValue(i) 356 v2, null2 := p2.GetStrValue(i) 357 pos, null3 := positions.GetValue(i) 358 ocur, null4 := occurrences.GetValue(i) 359 if null1 || null2 || null3 || null4 || len(v2) == 0 { 360 if err := rs.AppendBytes(nil, true); err != nil { 361 return err 362 } 363 } else { 364 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 365 match, res, err := op.regMap.regularSubstr(pat, expr, pos, ocur) 366 if err != nil { 367 return err 368 } 369 if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil { 370 return err 371 } 372 } 373 } 374 return nil 375 376 } 377 return nil 378 } 379 380 func (op *opBuiltInRegexp) builtInRegexpInstr(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 381 p1 := vector.GenerateFunctionStrParameter(parameters[0]) 382 p2 := vector.GenerateFunctionStrParameter(parameters[1]) 383 384 rs := vector.MustFunctionResult[int64](result) 385 switch len(parameters) { 386 case 2: 387 return opBinaryStrStrToFixedWithErrorCheck[int64](parameters, result, proc, length, func(v1, v2 string) (int64, error) { 388 return op.regMap.regularInstr(v2, v1, 1, 1, 0) 389 }) 390 391 case 3: 392 positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2]) 393 for i := uint64(0); i < uint64(length); i++ { 394 v1, null1 := p1.GetStrValue(i) 395 v2, null2 := p2.GetStrValue(i) 396 pos, null3 := positions.GetValue(i) 397 if null1 || null2 || null3 { 398 if err := rs.Append(0, true); err != nil { 399 return err 400 } 401 } else { 402 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 403 index, err := op.regMap.regularInstr(pat, expr, pos, 1, 0) 404 if err != nil { 405 return err 406 } 407 if err = rs.Append(index, false); err != nil { 408 return err 409 } 410 } 411 } 412 413 case 4: 414 positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2]) 415 occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3]) 416 for i := uint64(0); i < uint64(length); i++ { 417 v1, null1 := p1.GetStrValue(i) 418 v2, null2 := p2.GetStrValue(i) 419 pos, null3 := positions.GetValue(i) 420 ocur, null4 := occurrences.GetValue(i) 421 if null1 || null2 || null3 || null4 { 422 if err := rs.Append(0, true); err != nil { 423 return err 424 } 425 } else { 426 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 427 index, err := op.regMap.regularInstr(pat, expr, pos, ocur, 0) 428 if err != nil { 429 return err 430 } 431 if err = rs.Append(index, false); err != nil { 432 return err 433 } 434 } 435 } 436 return nil 437 438 case 5: 439 positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2]) 440 occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3]) 441 resultOption := vector.GenerateFunctionFixedTypeParameter[int8](parameters[4]) 442 for i := uint64(0); i < uint64(length); i++ { 443 v1, null1 := p1.GetStrValue(i) 444 v2, null2 := p2.GetStrValue(i) 445 pos, null3 := positions.GetValue(i) 446 ocur, null4 := occurrences.GetValue(i) 447 resOp, null5 := resultOption.GetValue(i) 448 if null1 || null2 || null3 || null4 || null5 { 449 if err := rs.Append(0, true); err != nil { 450 return err 451 } 452 } else { 453 expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2) 454 index, err := op.regMap.regularInstr(pat, expr, pos, ocur, resOp) 455 if err != nil { 456 return err 457 } 458 if err = rs.Append(index, false); err != nil { 459 return err 460 } 461 } 462 } 463 } 464 return nil 465 } 466 467 func (op *opBuiltInRegexp) builtInRegexpLike(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error { 468 p1 := vector.GenerateFunctionStrParameter(parameters[0]) 469 p2 := vector.GenerateFunctionStrParameter(parameters[1]) 470 rs := vector.MustFunctionResult[bool](result) 471 472 if len(parameters) == 2 { 473 return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) { 474 match, err := op.regMap.regularLike(v2, v1, "c") 475 return match, err 476 }) 477 } else if len(parameters) == 3 { 478 if parameters[2].IsConstNull() { 479 nulls.AddRange(rs.GetResultVector().GetNulls(), 0, uint64(length)) 480 return nil 481 } 482 483 p3 := vector.GenerateFunctionStrParameter(parameters[2]) 484 for i := uint64(0); i < uint64(length); i++ { 485 expr, null1 := p1.GetStrValue(i) 486 pat, null2 := p2.GetStrValue(i) 487 mt, null3 := p3.GetStrValue(i) 488 if null1 || null2 || null3 { 489 if err := rs.Append(false, true); err != nil { 490 return err 491 } 492 } else { 493 match, err := op.regMap.regularLike(string(pat), string(expr), string(mt)) 494 if err != nil { 495 return err 496 } 497 if err = rs.Append(match, false); err != nil { 498 return err 499 } 500 } 501 } 502 } 503 return nil 504 } 505 506 func (op *opBuiltInRegexp) builtInRegexpReplace(parameters []*vector.Vector, result vector.FunctionResultWrapper, _ *process.Process, length int) error { 507 p1 := vector.GenerateFunctionStrParameter(parameters[0]) // expr 508 p2 := vector.GenerateFunctionStrParameter(parameters[1]) // pat 509 p3 := vector.GenerateFunctionStrParameter(parameters[2]) // repl 510 rs := vector.MustFunctionResult[types.Varlena](result) 511 512 if parameters[0].IsConstNull() || parameters[1].IsConstNull() || parameters[2].IsConstNull() { 513 for i := uint64(0); i < uint64(length); i++ { 514 if err := rs.AppendBytes(nil, true); err != nil { 515 return err 516 } 517 } 518 return nil 519 } 520 521 switch len(parameters) { 522 case 3: 523 for i := uint64(0); i < uint64(length); i++ { 524 v1, null1 := p1.GetStrValue(i) 525 v2, null2 := p2.GetStrValue(i) 526 v3, null3 := p3.GetStrValue(i) 527 if null1 || null2 || null3 { 528 if err := rs.AppendBytes(nil, true); err != nil { 529 return err 530 } 531 } else { 532 val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), 1, 0) 533 if err != nil { 534 return err 535 } 536 if err = rs.AppendBytes([]byte(val), false); err != nil { 537 return err 538 } 539 } 540 } 541 542 case 4: 543 p4 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3]) 544 for i := uint64(0); i < uint64(length); i++ { 545 v1, null1 := p1.GetStrValue(i) 546 v2, null2 := p2.GetStrValue(i) 547 v3, null3 := p3.GetStrValue(i) 548 v4, null4 := p4.GetValue(i) 549 if null1 || null2 || null3 || null4 { 550 if err := rs.AppendBytes(nil, true); err != nil { 551 return err 552 } 553 } else { 554 val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), v4, 0) 555 if err != nil { 556 return err 557 } 558 if err = rs.AppendBytes([]byte(val), false); err != nil { 559 return err 560 } 561 } 562 } 563 564 case 5: 565 p4 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3]) 566 p5 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[4]) 567 for i := uint64(0); i < uint64(length); i++ { 568 v1, null1 := p1.GetStrValue(i) 569 v2, null2 := p2.GetStrValue(i) 570 v3, null3 := p3.GetStrValue(i) 571 v4, null4 := p4.GetValue(i) 572 v5, null5 := p5.GetValue(i) 573 if null1 || null2 || null3 || null4 || null5 { 574 if err := rs.AppendBytes(nil, true); err != nil { 575 return err 576 } 577 } else { 578 val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), v4, v5) 579 if err != nil { 580 return err 581 } 582 if err = rs.AppendBytes([]byte(val), false); err != nil { 583 return err 584 } 585 } 586 } 587 } 588 return nil 589 } 590 591 type regexpSet struct { 592 mp map[string]*regexp.Regexp 593 } 594 595 func (rs *regexpSet) getRegularMatcher(pat string) (*regexp.Regexp, error) { 596 var err error 597 598 reg, ok := rs.mp[pat] 599 if !ok { 600 if len(rs.mp) == mapSizeForRegexp { 601 for key := range rs.mp { 602 delete(rs.mp, key) 603 break 604 } 605 } 606 607 reg, err = regexp.Compile(pat) 608 if err != nil { 609 return nil, err 610 } 611 rs.mp[pat] = reg 612 } 613 return reg, nil 614 } 615 616 func (rs *regexpSet) regularMatchForLikeOp(pat []byte, str []byte) (match bool, err error) { 617 replace := func(s string) string { 618 var oldCharactor rune 619 620 r := make([]byte, len(s)*2) 621 w := 0 622 start := 0 623 for len(s) > start { 624 character, wid := utf8.DecodeRuneInString(s[start:]) 625 if oldCharactor == '\\' { 626 w += copy(r[w:], s[start:start+wid]) 627 start += wid 628 oldCharactor = 0 629 continue 630 } 631 switch character { 632 case '_': 633 w += copy(r[w:], []byte{'.'}) 634 case '%': 635 w += copy(r[w:], []byte{'.', '*'}) 636 case '(': 637 w += copy(r[w:], []byte{'\\', '('}) 638 case ')': 639 w += copy(r[w:], []byte{'\\', ')'}) 640 case '\\': 641 default: 642 w += copy(r[w:], s[start:start+wid]) 643 } 644 start += wid 645 oldCharactor = character 646 } 647 return string(r[:w]) 648 } 649 convert := func(expr []byte) string { 650 return fmt.Sprintf("^(?s:%s)$", replace(util.UnsafeBytesToString(expr))) 651 } 652 653 realPat := convert(pat) 654 reg, err := rs.getRegularMatcher(realPat) 655 if err != nil { 656 return false, nil 657 } 658 return reg.Match(str), nil 659 } 660 661 // if str[pos:] matched pat. 662 // return Nth (N = occurrence here) of match result 663 func (rs *regexpSet) regularSubstr(pat string, str string, pos, occurrence int64) (match bool, substr string, err error) { 664 // check position 665 if pos < 1 || pos > int64(len(str)) { 666 return false, "", moerr.NewInvalidInputNoCtx("regexp_substr: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str)) 667 } 668 // check occurrence 669 if occurrence < 1 { 670 return false, "", moerr.NewInvalidInputNoCtx("regexp_substr have Index out of bounds in regular expression search, return occurrence %d", occurrence) 671 } 672 reg, err := rs.getRegularMatcher(pat) 673 if err != nil { 674 return false, "", err 675 } 676 677 // match and return 678 matches := reg.FindAllString(str[pos-1:], -1) 679 if l := int64(len(matches)); l < occurrence { 680 return false, "", nil 681 } 682 return true, matches[occurrence-1], nil 683 } 684 685 func (rs *regexpSet) regularReplace(pat string, str string, repl string, pos, occurrence int64) (r string, err error) { 686 // check position 687 if pos < 1 || pos > int64(len(str)) { 688 return "", moerr.NewInvalidInputNoCtx("regexp_replace: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str)) 689 } 690 // check occurrence 691 if occurrence < 0 { 692 return "", moerr.NewInvalidInputNoCtx("regexp_replace have Index out of bounds in regular expression search, return occurrence %d", occurrence) 693 } 694 695 reg, err := rs.getRegularMatcher(pat) 696 if err != nil { 697 pat = "[" + pat + "]" 698 return "", moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat) 699 } 700 701 //match result indexs 702 matchRes := reg.FindAllStringIndex(str, -1) 703 if matchRes == nil { 704 return str, nil 705 } //find the match position 706 index := 0 707 for int64(matchRes[index][0]) < pos-1 { 708 index++ 709 if index == len(matchRes) { 710 return str, nil 711 } 712 } 713 matchRes = matchRes[index:] 714 if int64(len(matchRes)) < occurrence { 715 return str, nil 716 } 717 if occurrence == 0 { 718 return reg.ReplaceAllLiteralString(str, repl), nil 719 } else if occurrence == int64(len(matchRes)) { 720 // the string won't be replaced 721 notRepl := str[:matchRes[occurrence-1][0]] 722 // the string will be replaced 723 replace := str[matchRes[occurrence-1][0]:] 724 return notRepl + reg.ReplaceAllLiteralString(replace, repl), nil 725 } else { 726 // the string won't be replaced 727 notRepl := str[:matchRes[occurrence-1][0]] 728 // the string will be replaced 729 replace := str[matchRes[occurrence-1][0]:matchRes[occurrence][0]] 730 left := str[matchRes[occurrence][0]:] 731 return notRepl + reg.ReplaceAllLiteralString(replace, repl) + left, nil 732 } 733 } 734 735 // regularInstr return an index indicating the starting or ending position of the match. 736 // it depends on the value of retOption, if 0 then return start, if 1 then return end. 737 // return 0 if match failed. 738 func (rs *regexpSet) regularInstr(pat string, str string, pos, occurrence int64, retOption int8) (index int64, err error) { 739 // check position 740 if pos < 1 || pos > int64(len(str)) { 741 return 0, moerr.NewInvalidInputNoCtx("regexp_instr: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str)) 742 } 743 // check occurrence 744 if occurrence < 1 { 745 return 0, moerr.NewInvalidInputNoCtx("regexp_instr have Index out of bounds in regular expression search, return occurrence %d", occurrence) 746 } 747 // check retOption 748 if retOption > 1 { 749 return 0, moerr.NewInvalidInputNoCtx("regexp_instr have Index out of bounds in regular expression search, return option %d", retOption) 750 } 751 752 reg, err := rs.getRegularMatcher(pat) 753 if err != nil { 754 pat = "[" + pat + "]" 755 return 0, moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat) 756 } 757 758 matches := reg.FindAllStringIndex(str[pos-1:], -1) 759 if int64(len(matches)) < occurrence { 760 return 0, nil 761 } 762 return int64(matches[occurrence-1][retOption]) + pos, nil 763 } 764 765 func (rs *regexpSet) regularLike(pat string, str string, matchType string) (bool, error) { 766 mt, err := getPureMatchType(matchType) 767 if err != nil { 768 return false, err 769 } 770 rule := fmt.Sprintf("(?%s)%s", mt, pat) 771 772 reg, err := rs.getRegularMatcher(rule) 773 if err != nil { 774 return false, err 775 } 776 777 match := reg.MatchString(str) 778 return match, nil 779 } 780 781 // Support four arguments: 782 // i: case insensitive. 783 // c: case sensitive. 784 // m: multiple line mode. 785 // n: '.' can match line terminator. 786 func getPureMatchType(input string) (string, error) { 787 retstring := "" 788 caseType := "" 789 foundn := false 790 foundm := false 791 792 for _, c := range input { 793 switch string(c) { 794 case "i": 795 caseType = "i" 796 case "c": 797 caseType = "" 798 case "m": 799 if !foundm { 800 retstring += "m" 801 foundm = true 802 } 803 case "n": 804 if !foundn { 805 retstring += "s" 806 foundn = true 807 } 808 default: 809 return "", moerr.NewInvalidInputNoCtx("regexp_like got invalid match_type input!") 810 } 811 } 812 813 retstring += caseType 814 815 return retstring, nil 816 }