github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/instr/instr.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package instr
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    19  	"strings"
    20  	"unicode"
    21  )
    22  
    23  func isASCII(s string) bool {
    24  	for i := 0; i < len(s); i++ {
    25  		if s[i] > unicode.MaxASCII {
    26  			return false
    27  		}
    28  	}
    29  	return true
    30  }
    31  
    32  func kmp(r1, r2 []rune) int64 {
    33  	next := make([]int, len(r2))
    34  	next[0] = -1
    35  	for i, j := 0, -1; i < len(r2)-1; {
    36  		if j == -1 || r2[i] == r2[j] {
    37  			i++
    38  			j++
    39  			next[i] = j
    40  		} else {
    41  			j = next[j]
    42  		}
    43  	}
    44  	for i, j := 0, 0; i < len(r1); {
    45  		if j == -1 || r1[i] == r2[j] {
    46  			i++
    47  			j++
    48  		} else {
    49  			j = next[j]
    50  		}
    51  		if j == len(r2) {
    52  			return int64(i - j + 1)
    53  		}
    54  	}
    55  	return 0
    56  }
    57  
    58  func Single(str string, substr string) int64 {
    59  	if len(substr) == 0 {
    60  		return 1
    61  	}
    62  	if isASCII(str) {
    63  		if !isASCII(substr) {
    64  			return 0
    65  		}
    66  		return int64(strings.Index(str, substr) + 1)
    67  	}
    68  	r1, r2 := []rune(str), []rune(substr)
    69  	return kmp(r1, r2)
    70  }
    71  
    72  func Instr(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) {
    73  	s1GoOn, s2GoOn := len(s1) > 1, len(s2) > 1
    74  	if s1GoOn && s2GoOn {
    75  		instr3(s1, s2, snsp, rs, nsp)
    76  	} else if s1GoOn {
    77  		instr1(s1, s2, snsp, rs, nsp)
    78  	} else {
    79  		instr2(s1, s2, snsp, rs, nsp)
    80  	}
    81  }
    82  
    83  func instr1(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) {
    84  	substr := s2[0]
    85  	for i, str := range s1 {
    86  		if snsp[0].Contains(uint64(i)) {
    87  			nsp.Set(uint64(i))
    88  			continue
    89  		}
    90  		rs[i] = Single(str, substr)
    91  	}
    92  }
    93  func instr2(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) {
    94  	str := s1[0]
    95  	for i, substr := range s2 {
    96  		if snsp[1].Contains(uint64(i)) {
    97  			nsp.Set(uint64(i))
    98  			continue
    99  		}
   100  		rs[i] = Single(str, substr)
   101  	}
   102  }
   103  
   104  func instr3(s1, s2 []string, snsp []*nulls.Nulls, rs []int64, nsp *nulls.Nulls) {
   105  	for i, str := range s1 {
   106  		if snsp[0].Contains(uint64(i)) || snsp[1].Contains(uint64(i)) {
   107  			nsp.Set(uint64(i))
   108  			continue
   109  		}
   110  		rs[i] = Single(str, s2[i])
   111  	}
   112  }