github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/characters/main.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "regexp" 6 7 "github.com/apaxa-go/helper/unicodeh/boundaryh" 8 "golang.org/x/text/unicode/norm" 9 ) 10 11 func ByteCount(s string) int { 12 return len(s) 13 } 14 15 func RuneCount(s string) int { 16 return len(([]rune)(s)) 17 } 18 19 var rxCount = regexp.MustCompile(`\PM\pM*|.`) 20 21 func RegexCount(s string) int { 22 return len(rxCount.FindAllString(s, -1)) 23 } 24 25 func NormalizedCount(form norm.Form, s string) int { 26 var i norm.Iter 27 i.InitString(form, s) 28 var count int 29 for !i.Done() { 30 i.Next() 31 count++ 32 } 33 return count 34 } 35 36 func NFCCount(s string) int { return NormalizedCount(norm.NFC, s) } 37 func NFDCount(s string) int { return NormalizedCount(norm.NFD, s) } 38 func NFKCCount(s string) int { return NormalizedCount(norm.NFKC, s) } 39 func NFKDCount(s string) int { return NormalizedCount(norm.NFKD, s) } 40 41 func GraphemeCount(s string) int { 42 return len(boundaryh.GraphemeClusterBreaksInString(s)) - 1 43 } 44 45 var examples = []string{ 46 "hello", 47 "你好", 48 "hĕllŏ", 49 "l̲i̲n̲e̲", 50 "fi", 51 "ffi", 52 "㈎", 53 "ẛ̣", 54 } 55 56 func main() { 57 fmt.Printf("bytes\trunes\tNFC\tNFD\tNFKC\tNFKD\tRegex\tGraph..\tText\n") 58 for _, example := range examples { 59 fmt.Printf("%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\t%q\n", 60 ByteCount(example), 61 RuneCount(example), 62 NFCCount(example), 63 NFDCount(example), 64 NFKCCount(example), 65 NFKDCount(example), 66 RegexCount(example), 67 GraphemeCount(example), 68 example, 69 ) 70 } 71 }