github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/utf/utf.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 // Package utf provides utilities for working with UTF strings, notably including UTF16. 5 package utf 6 7 import "bytes" 8 import "encoding/binary" 9 import "fmt" 10 import "unicode/utf16" 11 import "unicode/utf8" 12 13 var LittleEndian binary.ByteOrder = binary.LittleEndian 14 var BigEndian binary.ByteOrder = binary.BigEndian 15 16 func UTF16ByteSliceToString(u8Buf []byte, byteOrder binary.ByteOrder) (utf8String string, err error) { 17 // Set default return values 18 19 utf8String = "" 20 err = nil 21 22 // Ensure []byte can be interpretted as []uint16 23 24 if 0 != (len(u8Buf) % 2) { 25 err = fmt.Errorf("UTF-16-LE requires []byte with even number of bytes") 26 return 27 } 28 29 // Convert u8Buf ([]byte) to u16Buf ([]uint16) 30 31 numUint16s := len(u8Buf) / 2 32 u16Reader := bytes.NewReader(u8Buf) 33 u16Buf := make([]uint16, numUint16s) 34 35 err = binary.Read(u16Reader, byteOrder, &u16Buf) 36 if nil != err { 37 return 38 } 39 40 // Convert u16Buf ([]uint16) to runeForm ([]rune) 41 42 runeFormSlice := utf16.Decode(u16Buf) 43 44 // Encode runeFormSlice elements into bytes.Buffer 45 46 var runeByteBuffer bytes.Buffer 47 48 for _, runeFormElement := range runeFormSlice { 49 _, _ = runeByteBuffer.WriteRune(runeFormElement) 50 } 51 52 // Return resultant string from runeByteBuffer 53 54 utf8String = runeByteBuffer.String() 55 56 return 57 } 58 59 func StringToUTF16ByteSlice(utf8String string, byteOrder binary.ByteOrder) (u8Buf []byte) { 60 runeArray := []rune(utf8String) 61 u16Slice := utf16.Encode(runeArray) 62 63 u8Buf = make([]byte, (2 * len(u16Slice))) 64 65 for i := 0; i < len(u16Slice); i++ { 66 if binary.LittleEndian == byteOrder { 67 u8Buf[(2*i)+0] = byte((u16Slice[i] >> 0) & 0xFF) 68 u8Buf[(2*i)+1] = byte((u16Slice[i] >> 8) & 0xFF) 69 } else { // binary.BigEndian == byteOrder 70 u8Buf[(2*i)+1] = byte((u16Slice[i] >> 0) & 0xFF) 71 u8Buf[(2*i)+0] = byte((u16Slice[i] >> 8) & 0xFF) 72 } 73 } 74 75 return 76 } 77 78 func UTF8ByteSliceToString(u8Buf []byte) (utf8String string, err error) { 79 if !utf8.Valid(u8Buf) { 80 utf8String = "" 81 err = fmt.Errorf("Not valid UTF-8") 82 return 83 } 84 85 utf8String = string(u8Buf) 86 err = nil 87 88 return 89 } 90 91 func StringToUTF8ByteSlice(utf8String string) (u8Buf []byte) { 92 u8Buf = []byte(utf8String) 93 94 return 95 }