github.com/camlistore/go4@v0.0.0-20200104003542-c7e774b10ea0/strutil/strutil.go (about) 1 /* 2 Copyright 2013 The Perkeep Authors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package strutil contains string and byte processing functions. 18 package strutil // import "go4.org/strutil" 19 20 import ( 21 "strings" 22 "unicode" 23 "unicode/utf8" 24 ) 25 26 // Fork of Go's implementation in pkg/strings/strings.go: 27 // Generic split: splits after each instance of sep, 28 // including sepSave bytes of sep in the subarrays. 29 func genSplit(dst []string, s, sep string, sepSave, n int) []string { 30 if n == 0 { 31 return nil 32 } 33 if sep == "" { 34 panic("sep is empty") 35 } 36 if n < 0 { 37 n = strings.Count(s, sep) + 1 38 } 39 c := sep[0] 40 start := 0 41 na := 0 42 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { 43 if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { 44 dst = append(dst, s[start:i+sepSave]) 45 na++ 46 start = i + len(sep) 47 i += len(sep) - 1 48 } 49 } 50 dst = append(dst, s[start:]) 51 return dst 52 } 53 54 // AppendSplitN is like strings.SplitN but appends to and returns dst. 55 // Unlike strings.SplitN, an empty separator is not supported. 56 // The count n determines the number of substrings to return: 57 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 58 // n == 0: the result is nil (zero substrings) 59 // n < 0: all substrings 60 func AppendSplitN(dst []string, s, sep string, n int) []string { 61 return genSplit(dst, s, sep, 0, n) 62 } 63 64 // equalFoldRune compares a and b runes whether they fold equally. 65 // 66 // The code comes from strings.EqualFold, but shortened to only one rune. 67 func equalFoldRune(sr, tr rune) bool { 68 if sr == tr { 69 return true 70 } 71 // Make sr < tr to simplify what follows. 72 if tr < sr { 73 sr, tr = tr, sr 74 } 75 // Fast check for ASCII. 76 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 77 // ASCII, and sr is upper case. tr must be lower case. 78 if tr == sr+'a'-'A' { 79 return true 80 } 81 return false 82 } 83 84 // General case. SimpleFold(x) returns the next equivalent rune > x 85 // or wraps around to smaller values. 86 r := unicode.SimpleFold(sr) 87 for r != sr && r < tr { 88 r = unicode.SimpleFold(r) 89 } 90 if r == tr { 91 return true 92 } 93 return false 94 } 95 96 // HasPrefixFold is like strings.HasPrefix but uses Unicode case-folding, 97 // matching case insensitively. 98 func HasPrefixFold(s, prefix string) bool { 99 if prefix == "" { 100 return true 101 } 102 for _, pr := range prefix { 103 if s == "" { 104 return false 105 } 106 // step with s, too 107 sr, size := utf8.DecodeRuneInString(s) 108 if sr == utf8.RuneError { 109 return false 110 } 111 s = s[size:] 112 if !equalFoldRune(sr, pr) { 113 return false 114 } 115 } 116 return true 117 } 118 119 // HasSuffixFold is like strings.HasSuffix but uses Unicode case-folding, 120 // matching case insensitively. 121 func HasSuffixFold(s, suffix string) bool { 122 if suffix == "" { 123 return true 124 } 125 // count the runes and bytes in s, but only till rune count of suffix 126 bo, so := len(s), len(suffix) 127 for bo > 0 && so > 0 { 128 r, size := utf8.DecodeLastRuneInString(s[:bo]) 129 if r == utf8.RuneError { 130 return false 131 } 132 bo -= size 133 134 sr, size := utf8.DecodeLastRuneInString(suffix[:so]) 135 if sr == utf8.RuneError { 136 return false 137 } 138 so -= size 139 140 if !equalFoldRune(r, sr) { 141 return false 142 } 143 } 144 return so == 0 145 } 146 147 // ContainsFold is like strings.Contains but uses Unicode case-folding. 148 func ContainsFold(s, substr string) bool { 149 if substr == "" { 150 return true 151 } 152 if s == "" { 153 return false 154 } 155 firstRune := rune(substr[0]) 156 if firstRune >= utf8.RuneSelf { 157 firstRune, _ = utf8.DecodeRuneInString(substr) 158 } 159 for i, rune := range s { 160 if equalFoldRune(rune, firstRune) && HasPrefixFold(s[i:], substr) { 161 return true 162 } 163 } 164 return false 165 } 166 167 // IsPlausibleJSON reports whether s likely contains a JSON object, without 168 // actually parsing it. It's meant to be a light heuristic. 169 func IsPlausibleJSON(s string) bool { 170 return startsWithOpenBrace(s) && endsWithCloseBrace(s) 171 } 172 173 func isASCIIWhite(b byte) bool { return b == ' ' || b == '\n' || b == '\r' || b == '\t' } 174 175 func startsWithOpenBrace(s string) bool { 176 for len(s) > 0 { 177 switch { 178 case s[0] == '{': 179 return true 180 case isASCIIWhite(s[0]): 181 s = s[1:] 182 default: 183 return false 184 } 185 } 186 return false 187 } 188 189 func endsWithCloseBrace(s string) bool { 190 for len(s) > 0 { 191 last := len(s) - 1 192 switch { 193 case s[last] == '}': 194 return true 195 case isASCIIWhite(s[last]): 196 s = s[:last] 197 default: 198 return false 199 } 200 } 201 return false 202 }