github.com/machinefi/w3bstream@v1.6.5-rc9.0.20240426031326-b8c7c4876e72/pkg/depends/x/stringsx/split_to_words.go (about)

     1  package stringsx
     2  
     3  import (
     4  	"unicode"
     5  	"unicode/utf8"
     6  )
     7  
     8  type LetterType int
     9  
    10  const (
    11  	LowerLetter LetterType = iota + 1
    12  	UpperLetter
    13  	Digit
    14  	Other
    15  )
    16  
    17  func CheckLetterType(c rune) LetterType {
    18  	switch {
    19  	case unicode.IsLower(c):
    20  		return LowerLetter
    21  	case unicode.IsUpper(c):
    22  		return UpperLetter
    23  	case unicode.IsDigit(c):
    24  		return Digit
    25  	default:
    26  		return Other
    27  	}
    28  }
    29  
    30  func (t LetterType) IsLetter() bool { return t <= UpperLetter }
    31  func (t LetterType) IsDigit() bool  { return t == Digit }
    32  func (t LetterType) IsOther() bool  { return t == Other }
    33  
    34  func SplitToWords(s string) []string {
    35  	if !utf8.ValidString(s) {
    36  		return []string{s}
    37  	}
    38  
    39  	words := make([]string, 0)
    40  	runes := [][]rune{{rune(s[0])}}
    41  	index := 1 // index for runes
    42  
    43  	for i := 1; i < len(s); i++ {
    44  		prev := CheckLetterType(runes[index-1][0])
    45  		curr := CheckLetterType(rune(s[i]))
    46  		if prev == curr {
    47  			runes[index-1] = append(runes[index-1], rune(s[i]))
    48  		} else {
    49  			runes = append(runes, []rune{rune(s[i])})
    50  			index++
    51  		}
    52  	}
    53  
    54  	for i := 0; i < len(runes)-1; i++ {
    55  		curr := CheckLetterType(runes[i][0])
    56  		next := CheckLetterType(runes[i+1][0])
    57  		if curr == UpperLetter && next == LowerLetter {
    58  			runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...)
    59  			runes[i] = runes[i][:len(runes[i])-1]
    60  		}
    61  	}
    62  
    63  	for _, word := range runes {
    64  		if len(word) == 0 {
    65  			continue
    66  		}
    67  		kind := CheckLetterType(word[0])
    68  		if kind == Other {
    69  			continue
    70  		}
    71  		words = append(words, string(word))
    72  	}
    73  
    74  	return words
    75  }