github.com/lbryio/lbcd@v0.22.119/claimtrie/normalization/normalizer_test.go (about) 1 package normalization 2 3 import ( 4 "bufio" 5 "bytes" 6 _ "embed" 7 "math/rand" 8 "strconv" 9 "strings" 10 "testing" 11 12 "github.com/stretchr/testify/require" 13 ) 14 15 func TestNormalizationGo(t *testing.T) { 16 testNormalization(t, normalizeGo) 17 } 18 19 func testNormalization(t *testing.T, normalize func(value []byte) []byte) { 20 21 r := require.New(t) 22 23 r.Equal("test", string(normalize([]byte("TESt")))) 24 r.Equal("test 23", string(normalize([]byte("tesT 23")))) 25 r.Equal("\xFF", string(normalize([]byte("\xFF")))) 26 r.Equal("\xC3\x28", string(normalize([]byte("\xC3\x28")))) 27 r.Equal("\xCF\x89", string(normalize([]byte("\xE2\x84\xA6")))) 28 r.Equal("\xD1\x84", string(normalize([]byte("\xD0\xA4")))) 29 r.Equal("\xD5\xA2", string(normalize([]byte("\xD4\xB2")))) 30 r.Equal("\xE3\x81\xB5\xE3\x82\x99", string(normalize([]byte("\xE3\x81\xB6")))) 31 r.Equal("\xE1\x84\x81\xE1\x85\xAA\xE1\x86\xB0", string(normalize([]byte("\xEA\xBD\x91")))) 32 } 33 34 func randSeq(n int) []byte { 35 var alphabet = []rune("abcdefghijklmnopqrstuvwxyz̃ABCDEFGHIJKLMNOPQRSTUVWXYZ̃") 36 37 b := make([]rune, n) 38 for i := range b { 39 b[i] = alphabet[rand.Intn(len(alphabet))] 40 } 41 return []byte(string(b)) 42 } 43 44 func BenchmarkNormalize(b *testing.B) { 45 benchmarkNormalize(b, normalizeGo) 46 } 47 48 func benchmarkNormalize(b *testing.B, normalize func(value []byte) []byte) { 49 rand.Seed(42) 50 strings := make([][]byte, b.N) 51 for i := 0; i < b.N; i++ { 52 strings[i] = randSeq(32) 53 } 54 b.ResetTimer() 55 for i := 0; i < b.N; i++ { 56 s := normalize(strings[i]) 57 require.True(b, len(s) >= 8) 58 } 59 } 60 61 //go:embed NormalizationTest_v11.txt 62 var nfdTests string 63 64 func TestDecomposition(t *testing.T) { 65 r := require.New(t) 66 67 scanner := bufio.NewScanner(strings.NewReader(nfdTests)) 68 for scanner.Scan() { 69 line := scanner.Text() 70 if len(line) <= 0 || line[0] == '@' || line[0] == '#' { 71 continue 72 } 73 splits := strings.Split(line, ";") 74 source := convertToBytes(splits[0]) 75 targetNFD := convertToBytes(splits[2]) 76 fixed := decompose(source) 77 r.True(bytes.Equal(targetNFD, fixed), "Failed on %s -> %s. Got %U, not %U", splits[0], splits[2], fixed, targetNFD) 78 } 79 } 80 81 func convertToBytes(s string) []byte { 82 splits := strings.Split(s, " ") 83 var b bytes.Buffer 84 for i := range splits { 85 value, _ := strconv.ParseUint(splits[i], 16, len(splits[i])*4) 86 b.WriteRune(rune(value)) 87 } 88 return b.Bytes() 89 }