github.com/v2fly/tools@v0.100.0/internal/lsp/fuzzy/input.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package fuzzy 6 7 import ( 8 "unicode" 9 ) 10 11 // RuneRole specifies the role of a rune in the context of an input. 12 type RuneRole byte 13 14 const ( 15 // RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII). 16 RNone RuneRole = iota 17 // RSep specifies a rune with the role of segment separator. 18 RSep 19 // RTail specifies a rune which is a lower-case tail in a word in the input. 20 RTail 21 // RUCTail specifies a rune which is an upper-case tail in a word in the input. 22 RUCTail 23 // RHead specifies a rune which is the first character in a word in the input. 24 RHead 25 ) 26 27 // RuneRoles detects the roles of each byte rune in an input string and stores it in the output 28 // slice. The rune role depends on the input type. Stops when it parsed all the runes in the string 29 // or when it filled the output. If output is nil, then it gets created. 30 func RuneRoles(str string, reuse []RuneRole) []RuneRole { 31 var output []RuneRole 32 if cap(reuse) < len(str) { 33 output = make([]RuneRole, 0, len(str)) 34 } else { 35 output = reuse[:0] 36 } 37 38 prev, prev2 := rtNone, rtNone 39 for i := 0; i < len(str); i++ { 40 r := rune(str[i]) 41 42 role := RNone 43 44 curr := rtLower 45 if str[i] <= unicode.MaxASCII { 46 curr = runeType(rt[str[i]] - '0') 47 } 48 49 if curr == rtLower { 50 if prev == rtNone || prev == rtPunct { 51 role = RHead 52 } else { 53 role = RTail 54 } 55 } else if curr == rtUpper { 56 role = RHead 57 58 if prev == rtUpper { 59 // This and previous characters are both upper case. 60 61 if i+1 == len(str) { 62 // This is last character, previous was also uppercase -> this is UCTail 63 // i.e., (current char is C): aBC / BC / ABC 64 role = RUCTail 65 } 66 } 67 } else if curr == rtPunct { 68 switch r { 69 case '.', ':': 70 role = RSep 71 } 72 } 73 if curr != rtLower { 74 if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) { 75 // The previous two characters were uppercase. The current one is not a lower case, so the 76 // previous one can't be a HEAD. Make it a UCTail. 77 // i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB. 78 output[i-1] = RUCTail 79 } 80 } 81 82 output = append(output, role) 83 prev2 = prev 84 prev = curr 85 } 86 return output 87 } 88 89 type runeType byte 90 91 const ( 92 rtNone runeType = iota 93 rtPunct 94 rtLower 95 rtUpper 96 ) 97 98 const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000" 99 100 // LastSegment returns the substring representing the last segment from the input, where each 101 // byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol 102 // or Filename type. 103 func LastSegment(input string, roles []RuneRole) string { 104 // Exclude ending separators. 105 end := len(input) - 1 106 for end >= 0 && roles[end] == RSep { 107 end-- 108 } 109 if end < 0 { 110 return "" 111 } 112 113 start := end - 1 114 for start >= 0 && roles[start] != RSep { 115 start-- 116 } 117 118 return input[start+1 : end+1] 119 } 120 121 // ToLower transforms the input string to lower case, which is stored in the output byte slice. 122 // The lower casing considers only ASCII values - non ASCII values are left unmodified. 123 // Stops when parsed all input or when it filled the output slice. If output is nil, then it gets 124 // created. 125 func ToLower(input string, reuse []byte) []byte { 126 output := reuse 127 if cap(reuse) < len(input) { 128 output = make([]byte, len(input)) 129 } 130 131 for i := 0; i < len(input); i++ { 132 r := rune(input[i]) 133 if r <= unicode.MaxASCII { 134 if 'A' <= r && r <= 'Z' { 135 r += 'a' - 'A' 136 } 137 } 138 output[i] = byte(r) 139 } 140 return output[:len(input)] 141 } 142 143 // WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input 144 // (start is inclusive, end is exclusive). 145 type WordConsumer func(start, end int) 146 147 // Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset 148 // delimiters for each word are fed to the provided consumer function. 149 func Words(roles []RuneRole, consume WordConsumer) { 150 var wordStart int 151 for i, r := range roles { 152 switch r { 153 case RUCTail, RTail: 154 case RHead, RNone, RSep: 155 if i != wordStart { 156 consume(wordStart, i) 157 } 158 wordStart = i 159 if r != RHead { 160 // Skip this character. 161 wordStart = i + 1 162 } 163 } 164 } 165 if wordStart != len(roles) { 166 consume(wordStart, len(roles)) 167 } 168 }