github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/characters/main.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  
     7  	"github.com/apaxa-go/helper/unicodeh/boundaryh"
     8  	"golang.org/x/text/unicode/norm"
     9  )
    10  
    11  func ByteCount(s string) int {
    12  	return len(s)
    13  }
    14  
    15  func RuneCount(s string) int {
    16  	return len(([]rune)(s))
    17  }
    18  
    19  var rxCount = regexp.MustCompile(`\PM\pM*|.`)
    20  
    21  func RegexCount(s string) int {
    22  	return len(rxCount.FindAllString(s, -1))
    23  }
    24  
    25  func NormalizedCount(form norm.Form, s string) int {
    26  	var i norm.Iter
    27  	i.InitString(form, s)
    28  	var count int
    29  	for !i.Done() {
    30  		i.Next()
    31  		count++
    32  	}
    33  	return count
    34  }
    35  
    36  func NFCCount(s string) int  { return NormalizedCount(norm.NFC, s) }
    37  func NFDCount(s string) int  { return NormalizedCount(norm.NFD, s) }
    38  func NFKCCount(s string) int { return NormalizedCount(norm.NFKC, s) }
    39  func NFKDCount(s string) int { return NormalizedCount(norm.NFKD, s) }
    40  
    41  func GraphemeCount(s string) int {
    42  	return len(boundaryh.GraphemeClusterBreaksInString(s)) - 1
    43  }
    44  
    45  var examples = []string{
    46  	"hello",
    47  	"你好",
    48  	"hĕllŏ",
    49  	"l̲i̲n̲e̲",
    50  	"fi",
    51  	"ffi",
    52  	"㈎",
    53  	"ẛ̣",
    54  }
    55  
    56  func main() {
    57  	fmt.Printf("bytes\trunes\tNFC\tNFD\tNFKC\tNFKD\tRegex\tGraph..\tText\n")
    58  	for _, example := range examples {
    59  		fmt.Printf("%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\t%q\n",
    60  			ByteCount(example),
    61  			RuneCount(example),
    62  			NFCCount(example),
    63  			NFDCount(example),
    64  			NFKCCount(example),
    65  			NFKDCount(example),
    66  			RegexCount(example),
    67  			GraphemeCount(example),
    68  			example,
    69  		)
    70  	}
    71  }