github.com/primecitizens/pcz/std@v0.2.1/text/unicode/utf16/utf16.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2010 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package utf16
     9  
    10  import (
    11  	"unsafe"
    12  
    13  	"github.com/primecitizens/pcz/std/core/arch"
    14  	"github.com/primecitizens/pcz/std/core/bits"
    15  	. "github.com/primecitizens/pcz/std/text/unicode/common"
    16  )
    17  
    18  func AsString(s []uint16) String {
    19  	if arch.BigEndian {
    20  		for i, x := range s {
    21  			s[i] = bits.ReverseBytes16(x)
    22  		}
    23  	}
    24  
    25  	return String(
    26  		unsafe.String(
    27  			(*byte)(unsafe.Pointer(unsafe.SliceData(s))), len(s)*2,
    28  		),
    29  	)
    30  }
    31  
    32  // A String represents a UTF-16 encoded string.
    33  //
    34  // NOTE: DO NOT use as go string.
    35  type String string
    36  
    37  func (s String) Slice() []uint16 {
    38  	ret := unsafe.Slice(
    39  		(*uint16)(unsafe.Pointer(unsafe.StringData(string(s)))), len(s)/2,
    40  	)
    41  
    42  	if arch.BigEndian {
    43  		for i, x := range ret {
    44  			ret[i] = bits.ReverseBytes16(x)
    45  		}
    46  	}
    47  
    48  	return ret
    49  }
    50  
    51  const (
    52  	// 0xd800-0xdc00 encodes the high 10 bits of a pair.
    53  	// 0xdc00-0xe000 encodes the low 10 bits of a pair.
    54  	// the value is those 20 bits plus 0x10000.
    55  	surr1 = SurrogateMin
    56  	surr2 = 0xdc00
    57  	surr3 = SurrogateMax + 1
    58  
    59  	surrSelf = 0x10000
    60  )
    61  
    62  // IsSurrogate reports whether the specified Unicode code point
    63  // can appear in a surrogate pair.
    64  func IsSurrogate(r rune) bool {
    65  	return surr1 <= r && r < surr3
    66  }