github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/instr/instr.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package instr 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/container/nulls" 19 "strings" 20 "unicode" 21 ) 22 23 func isASCII(s string) bool { 24 for i := 0; i < len(s); i++ { 25 if s[i] > unicode.MaxASCII { 26 return false 27 } 28 } 29 return true 30 } 31 32 func kmp(r1, r2 []rune) int64 { 33 next := make([]int, len(r2)) 34 next[0] = -1 35 for i, j := 0, -1; i < len(r2)-1; { 36 if j == -1 || r2[i] == r2[j] { 37 i++ 38 j++ 39 next[i] = j 40 } else { 41 j = next[j] 42 } 43 } 44 for i, j := 0, 0; i < len(r1); { 45 if j == -1 || r1[i] == r2[j] { 46 i++ 47 j++ 48 } else { 49 j = next[j] 50 } 51 if j == len(r2) { 52 return int64(i - j + 1) 53 } 54 } 55 return 0 56 } 57 58 func Single(str string, substr string) int64 { 59 if len(substr) == 0 { 60 return 1 61 } 62 if isASCII(str) { 63 if !isASCII(substr) { 64 return 0 65 } 66 return int64(strings.Index(str, substr) + 1) 67 } 68 r1, r2 := []rune(str), []rune(substr) 69 return kmp(r1, r2) 70 } 71 72 func Instr(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) { 73 s1GoOn, s2GoOn := len(s1) > 1, len(s2) > 1 74 if s1GoOn && s2GoOn { 75 instr3(s1, s2, snsp, rs, nsp) 76 } else if s1GoOn { 77 instr1(s1, s2, snsp, rs, nsp) 78 } else { 79 instr2(s1, s2, snsp, rs, nsp) 80 } 81 } 82 83 func instr1(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) { 84 substr := s2[0] 85 for i, str := range s1 { 86 if snsp[0].Contains(uint64(i)) { 87 nsp.Set(uint64(i)) 88 continue 89 } 90 rs[i] = Single(str, substr) 91 } 92 } 93 func instr2(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) { 94 str := s1[0] 95 for i, substr := range s2 { 96 if snsp[1].Contains(uint64(i)) { 97 nsp.Set(uint64(i)) 98 continue 99 } 100 rs[i] = Single(str, substr) 101 } 102 } 103 104 func instr3(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) { 105 for i, str := range s1 { 106 if snsp[0].Contains(uint64(i)) || snsp[1].Contains(uint64(i)) { 107 nsp.Set(uint64(i)) 108 continue 109 } 110 rs[i] = Single(str, s2[i]) 111 } 112 }