github.com/lbryio/lbcd@v0.22.119/claimtrie/normalization/normalizer_test.go (about)

     1  package normalization
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	_ "embed"
     7  	"math/rand"
     8  	"strconv"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/stretchr/testify/require"
    13  )
    14  
    15  func TestNormalizationGo(t *testing.T) {
    16  	testNormalization(t, normalizeGo)
    17  }
    18  
    19  func testNormalization(t *testing.T, normalize func(value []byte) []byte) {
    20  
    21  	r := require.New(t)
    22  
    23  	r.Equal("test", string(normalize([]byte("TESt"))))
    24  	r.Equal("test 23", string(normalize([]byte("tesT 23"))))
    25  	r.Equal("\xFF", string(normalize([]byte("\xFF"))))
    26  	r.Equal("\xC3\x28", string(normalize([]byte("\xC3\x28"))))
    27  	r.Equal("\xCF\x89", string(normalize([]byte("\xE2\x84\xA6"))))
    28  	r.Equal("\xD1\x84", string(normalize([]byte("\xD0\xA4"))))
    29  	r.Equal("\xD5\xA2", string(normalize([]byte("\xD4\xB2"))))
    30  	r.Equal("\xE3\x81\xB5\xE3\x82\x99", string(normalize([]byte("\xE3\x81\xB6"))))
    31  	r.Equal("\xE1\x84\x81\xE1\x85\xAA\xE1\x86\xB0", string(normalize([]byte("\xEA\xBD\x91"))))
    32  }
    33  
    34  func randSeq(n int) []byte {
    35  	var alphabet = []rune("abcdefghijklmnopqrstuvwxyz̃ABCDEFGHIJKLMNOPQRSTUVWXYZ̃")
    36  
    37  	b := make([]rune, n)
    38  	for i := range b {
    39  		b[i] = alphabet[rand.Intn(len(alphabet))]
    40  	}
    41  	return []byte(string(b))
    42  }
    43  
    44  func BenchmarkNormalize(b *testing.B) {
    45  	benchmarkNormalize(b, normalizeGo)
    46  }
    47  
    48  func benchmarkNormalize(b *testing.B, normalize func(value []byte) []byte) {
    49  	rand.Seed(42)
    50  	strings := make([][]byte, b.N)
    51  	for i := 0; i < b.N; i++ {
    52  		strings[i] = randSeq(32)
    53  	}
    54  	b.ResetTimer()
    55  	for i := 0; i < b.N; i++ {
    56  		s := normalize(strings[i])
    57  		require.True(b, len(s) >= 8)
    58  	}
    59  }
    60  
    61  //go:embed NormalizationTest_v11.txt
    62  var nfdTests string
    63  
    64  func TestDecomposition(t *testing.T) {
    65  	r := require.New(t)
    66  
    67  	scanner := bufio.NewScanner(strings.NewReader(nfdTests))
    68  	for scanner.Scan() {
    69  		line := scanner.Text()
    70  		if len(line) <= 0 || line[0] == '@' || line[0] == '#' {
    71  			continue
    72  		}
    73  		splits := strings.Split(line, ";")
    74  		source := convertToBytes(splits[0])
    75  		targetNFD := convertToBytes(splits[2])
    76  		fixed := decompose(source)
    77  		r.True(bytes.Equal(targetNFD, fixed), "Failed on %s -> %s. Got %U, not %U", splits[0], splits[2], fixed, targetNFD)
    78  	}
    79  }
    80  
    81  func convertToBytes(s string) []byte {
    82  	splits := strings.Split(s, " ")
    83  	var b bytes.Buffer
    84  	for i := range splits {
    85  		value, _ := strconv.ParseUint(splits[i], 16, len(splits[i])*4)
    86  		b.WriteRune(rune(value))
    87  	}
    88  	return b.Bytes()
    89  }