github.com/lbryio/lbcd@v0.22.119/claimtrie/normalization/normalizer_icu.go (about) 1 //go:build use_icu_normalization 2 // +build use_icu_normalization 3 4 package normalization 5 6 // #cgo CFLAGS: -O2 7 // #cgo LDFLAGS: -licuio -licui18n -licuuc -licudata 8 // #include <unicode/unorm2.h> 9 // #include <unicode/ustring.h> 10 // #include <unicode/uversion.h> 11 // int icu_version() { 12 // UVersionInfo info; 13 // u_getVersion(info); 14 // return ((int)(info[0]) << 16) + info[1]; 15 // } 16 // int normalize(char* name, int length, char* result) { 17 // UErrorCode ec = U_ZERO_ERROR; 18 // static const UNormalizer2* normalizer = NULL; 19 // if (normalizer == NULL) normalizer = unorm2_getNFDInstance(&ec); 20 // UChar dest[256]; // maximum claim name size is 255; we won't have more UTF16 chars than bytes 21 // int dest_len; 22 // u_strFromUTF8(dest, 256, &dest_len, name, length, &ec); 23 // if (U_FAILURE(ec) || dest_len == 0) return 0; 24 // UChar normalized[256]; 25 // dest_len = unorm2_normalize(normalizer, dest, dest_len, normalized, 256, &ec); 26 // if (U_FAILURE(ec) || dest_len == 0) return 0; 27 // dest_len = u_strFoldCase(dest, 256, normalized, dest_len, U_FOLD_CASE_DEFAULT, &ec); 28 // if (U_FAILURE(ec) || dest_len == 0) return 0; 29 // u_strToUTF8(result, 512, &dest_len, dest, dest_len, &ec); 30 // return dest_len; 31 // } 32 import "C" 33 import ( 34 "bytes" 35 "encoding/hex" 36 "fmt" 37 "unsafe" 38 ) 39 40 func init() { 41 Normalize = normalizeICU 42 NormalizeTitle = "Normalizing strings via ICU. ICU version = " + IcuVersion() 43 } 44 45 func IcuVersion() string { 46 // TODO: we probably need to explode if it's not 63.2 as it affects consensus 47 result := C.icu_version() 48 return fmt.Sprintf("%d.%d", result>>16, result&0xffff) 49 } 50 51 func normalizeICU(value []byte) []byte { 52 original := value 53 if len(value) <= 0 { 54 return value 55 } 56 57 other := normalizeGo(value) 58 59 name := (*C.char)(unsafe.Pointer(&value[0])) 60 length := C.int(len(value)) 61 62 // hopefully this is a stack alloc (but it may be a bit large for that): 63 var resultName [512]byte // inputs are restricted to 255 chars; it shouldn't expand too much past that 64 pointer := unsafe.Pointer(&resultName[0]) 65 66 resultLength := C.normalize(name, length, (*C.char)(pointer)) 67 if resultLength > 0 { 68 value = C.GoBytes(pointer, resultLength) 69 } 70 71 // return resultName[0:resultLength] -- we want to shrink the pointer (not use a slice on 1024) 72 if !bytes.Equal(other, value) { 73 fmt.Printf("Failed with %s, %s != %s,\n\t%s, %s != %s,\n", original, value, other, 74 hex.EncodeToString(original), hex.EncodeToString(value), hex.EncodeToString(other)) 75 } 76 return value 77 }