github.com/machinefi/w3bstream@v1.6.5-rc9.0.20240426031326-b8c7c4876e72/pkg/depends/x/stringsx/split_to_words.go (about) 1 package stringsx 2 3 import ( 4 "unicode" 5 "unicode/utf8" 6 ) 7 8 type LetterType int 9 10 const ( 11 LowerLetter LetterType = iota + 1 12 UpperLetter 13 Digit 14 Other 15 ) 16 17 func CheckLetterType(c rune) LetterType { 18 switch { 19 case unicode.IsLower(c): 20 return LowerLetter 21 case unicode.IsUpper(c): 22 return UpperLetter 23 case unicode.IsDigit(c): 24 return Digit 25 default: 26 return Other 27 } 28 } 29 30 func (t LetterType) IsLetter() bool { return t <= UpperLetter } 31 func (t LetterType) IsDigit() bool { return t == Digit } 32 func (t LetterType) IsOther() bool { return t == Other } 33 34 func SplitToWords(s string) []string { 35 if !utf8.ValidString(s) { 36 return []string{s} 37 } 38 39 words := make([]string, 0) 40 runes := [][]rune{{rune(s[0])}} 41 index := 1 // index for runes 42 43 for i := 1; i < len(s); i++ { 44 prev := CheckLetterType(runes[index-1][0]) 45 curr := CheckLetterType(rune(s[i])) 46 if prev == curr { 47 runes[index-1] = append(runes[index-1], rune(s[i])) 48 } else { 49 runes = append(runes, []rune{rune(s[i])}) 50 index++ 51 } 52 } 53 54 for i := 0; i < len(runes)-1; i++ { 55 curr := CheckLetterType(runes[i][0]) 56 next := CheckLetterType(runes[i+1][0]) 57 if curr == UpperLetter && next == LowerLetter { 58 runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...) 59 runes[i] = runes[i][:len(runes[i])-1] 60 } 61 } 62 63 for _, word := range runes { 64 if len(word) == 0 { 65 continue 66 } 67 kind := CheckLetterType(word[0]) 68 if kind == Other { 69 continue 70 } 71 words = append(words, string(word)) 72 } 73 74 return words 75 }