github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/utf/utf.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  // Package utf provides utilities for working with UTF strings, notably including UTF16.
     5  package utf
     6  
     7  import "bytes"
     8  import "encoding/binary"
     9  import "fmt"
    10  import "unicode/utf16"
    11  import "unicode/utf8"
    12  
    13  var LittleEndian binary.ByteOrder = binary.LittleEndian
    14  var BigEndian binary.ByteOrder = binary.BigEndian
    15  
    16  func UTF16ByteSliceToString(u8Buf []byte, byteOrder binary.ByteOrder) (utf8String string, err error) {
    17  	// Set default return values
    18  
    19  	utf8String = ""
    20  	err = nil
    21  
    22  	// Ensure []byte can be interpretted as []uint16
    23  
    24  	if 0 != (len(u8Buf) % 2) {
    25  		err = fmt.Errorf("UTF-16-LE requires []byte with even number of bytes")
    26  		return
    27  	}
    28  
    29  	// Convert u8Buf ([]byte) to u16Buf ([]uint16)
    30  
    31  	numUint16s := len(u8Buf) / 2
    32  	u16Reader := bytes.NewReader(u8Buf)
    33  	u16Buf := make([]uint16, numUint16s)
    34  
    35  	err = binary.Read(u16Reader, byteOrder, &u16Buf)
    36  	if nil != err {
    37  		return
    38  	}
    39  
    40  	// Convert u16Buf ([]uint16) to runeForm ([]rune)
    41  
    42  	runeFormSlice := utf16.Decode(u16Buf)
    43  
    44  	// Encode runeFormSlice elements into bytes.Buffer
    45  
    46  	var runeByteBuffer bytes.Buffer
    47  
    48  	for _, runeFormElement := range runeFormSlice {
    49  		_, _ = runeByteBuffer.WriteRune(runeFormElement)
    50  	}
    51  
    52  	// Return resultant string from runeByteBuffer
    53  
    54  	utf8String = runeByteBuffer.String()
    55  
    56  	return
    57  }
    58  
    59  func StringToUTF16ByteSlice(utf8String string, byteOrder binary.ByteOrder) (u8Buf []byte) {
    60  	runeArray := []rune(utf8String)
    61  	u16Slice := utf16.Encode(runeArray)
    62  
    63  	u8Buf = make([]byte, (2 * len(u16Slice)))
    64  
    65  	for i := 0; i < len(u16Slice); i++ {
    66  		if binary.LittleEndian == byteOrder {
    67  			u8Buf[(2*i)+0] = byte((u16Slice[i] >> 0) & 0xFF)
    68  			u8Buf[(2*i)+1] = byte((u16Slice[i] >> 8) & 0xFF)
    69  		} else { // binary.BigEndian == byteOrder
    70  			u8Buf[(2*i)+1] = byte((u16Slice[i] >> 0) & 0xFF)
    71  			u8Buf[(2*i)+0] = byte((u16Slice[i] >> 8) & 0xFF)
    72  		}
    73  	}
    74  
    75  	return
    76  }
    77  
    78  func UTF8ByteSliceToString(u8Buf []byte) (utf8String string, err error) {
    79  	if !utf8.Valid(u8Buf) {
    80  		utf8String = ""
    81  		err = fmt.Errorf("Not valid UTF-8")
    82  		return
    83  	}
    84  
    85  	utf8String = string(u8Buf)
    86  	err = nil
    87  
    88  	return
    89  }
    90  
    91  func StringToUTF8ByteSlice(utf8String string) (u8Buf []byte) {
    92  	u8Buf = []byte(utf8String)
    93  
    94  	return
    95  }