github.com/iDigitalFlame/xmt@v0.5.4/device/winapi/utf16.go (about)

     1  // Copyright (C) 2020 - 2023 iDigitalFlame
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU General Public License as published by
     5  // the Free Software Foundation, either version 3 of the License, or
     6  // any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU General Public License
    14  // along with this program.  If not, see <https://www.gnu.org/licenses/>.
    15  //
    16  
    17  // Package winapi is a Windows specific package that assists with handeling or
    18  // accessing many advanced WinAPI functions. This package also contains various
    19  // helper functions to assist with complex operations.
    20  package winapi
    21  
    22  import (
    23  	"syscall"
    24  	"unsafe"
    25  )
    26  
    27  const (
    28  	utfSelf        = 0x10000
    29  	utfSurgA       = 0xd800
    30  	utfSurgB       = 0xdc00
    31  	utfSurgC       = 0xe000
    32  	utfRuneMax     = rune(0x10FFFF) // '\U0010FFFF'
    33  	utfReplacement = rune(0xFFFD)   // '\uFFFD'
    34  )
    35  
    36  // SliceHeader is the runtime representation of a slice.
    37  //
    38  // It cannot be used safely or portably and its representation may change in a
    39  // later release.
    40  // ^ Hey, shut up.
    41  type SliceHeader struct {
    42  	Data unsafe.Pointer
    43  	Len  int
    44  	Cap  int
    45  }
    46  
    47  // FnvHash returns the fnv32 hash of the supplied string value.
    48  func FnvHash(n string) uint32 {
    49  	h := uint32(2166136261)
    50  	for i := range n {
    51  		h *= 16777619
    52  		h ^= uint32(n[i])
    53  	}
    54  	return h
    55  }
    56  
    57  // UTF16Decode returns the Unicode code point sequence represented by the UTF-16
    58  // encoding rune values supplied.
    59  func UTF16Decode(s []uint16) []rune {
    60  	var (
    61  		b = make([]rune, len(s))
    62  		n int
    63  	)
    64  loop:
    65  	for i := 0; i < len(s); i++ {
    66  		switch r := s[i]; {
    67  		case r == 0:
    68  			break loop
    69  		case r < utfSurgA, utfSurgC <= r:
    70  			b[n] = rune(r)
    71  		case utfSurgA <= r && r < utfSurgB && i+1 < len(s) && utfSurgB <= s[i+1] && s[i+1] < utfSurgC:
    72  			b[n] = utf16DecodeRune(rune(r), rune(s[i+1]))
    73  			i++
    74  		default:
    75  			b[n] = utfReplacement
    76  		}
    77  		n++
    78  	}
    79  	return b[:n]
    80  }
    81  
    82  // UTF16ToString returns the UTF-8 encoding of the UTF-16 sequence s, with a
    83  // terminating NUL and any bytes after the NUL removed.
    84  func UTF16ToString(s []uint16) string {
    85  	return string(UTF16Decode(s))
    86  }
    87  func utf16DecodeRune(r1, r2 rune) rune {
    88  	if utfSurgA <= r1 && r1 < utfSurgB && utfSurgB <= r2 && r2 < utfSurgC {
    89  		return (r1-utfSurgA)<<10 | (r2 - utfSurgB) + utfSelf
    90  	}
    91  	return utfReplacement
    92  }
    93  
    94  // UTF16EncodeStd encodes the runes into a UTF16 array and ignores zero points.
    95  //
    96  // This is ONLY safe to use if you know what you're doing.
    97  func UTF16EncodeStd(s []rune) []uint16 {
    98  	n := len(s)
    99  	for i := range s {
   100  		if s[i] < utfSelf {
   101  			continue
   102  		}
   103  		n++
   104  	}
   105  	var (
   106  		b = make([]uint16, n)
   107  		i int
   108  	)
   109  	for n = 0; n < len(s); i++ {
   110  		switch {
   111  		case 0 <= s[i] && s[i] < utfSurgA, utfSurgC <= s[i] && s[i] < utfSelf:
   112  			b[n] = uint16(s[i])
   113  			n++
   114  		case utfSelf <= s[i] && s[i] <= utfRuneMax:
   115  			b[n], b[n+1] = utf16EncodeRune(s[i])
   116  			n += 2
   117  		default:
   118  			b[n] = uint16(utfReplacement)
   119  			n++
   120  		}
   121  	}
   122  	return b[:n]
   123  }
   124  
   125  // UTF16PtrToString takes a pointer to a UTF-16 sequence and returns the
   126  // corresponding UTF-8 encoded string.
   127  //
   128  // If the pointer is nil, it returns the empty string. It assumes that the UTF-16
   129  // sequence is terminated at a zero word; if the zero word is not present, the
   130  // program may crash.
   131  func UTF16PtrToString(p *uint16) string {
   132  	if p == nil || *p == 0 {
   133  		return ""
   134  	}
   135  	n := 0
   136  	for v := unsafe.Pointer(p); *(*uint16)(v) != 0; n++ {
   137  		v = unsafe.Pointer(uintptr(v) + unsafe.Sizeof(*p))
   138  	}
   139  	var s []uint16
   140  	h := (*SliceHeader)(unsafe.Pointer(&s))
   141  	h.Data, h.Len, h.Cap = unsafe.Pointer(p), n, n
   142  	return string(UTF16Decode(s))
   143  }
   144  func utf16Encode(s []rune) ([]uint16, error) {
   145  	n := len(s)
   146  	for i := range s {
   147  		if s[i] == 0 && i+1 < len(s) {
   148  			return nil, syscall.EINVAL
   149  		}
   150  		if s[i] < utfSelf {
   151  			continue
   152  		}
   153  		n++
   154  	}
   155  	var (
   156  		b = make([]uint16, n)
   157  		i int
   158  	)
   159  	for n = 0; n < len(s); i++ {
   160  		switch {
   161  		case s[i] == 0 && i+1 < len(s):
   162  			return nil, syscall.EINVAL
   163  		case 0 <= s[i] && s[i] < utfSurgA, utfSurgC <= s[i] && s[i] < utfSelf:
   164  			b[n] = uint16(s[i])
   165  			n++
   166  		case utfSelf <= s[i] && s[i] <= utfRuneMax:
   167  			b[n], b[n+1] = utf16EncodeRune(s[i])
   168  			n += 2
   169  		default:
   170  			b[n] = uint16(utfReplacement)
   171  			n++
   172  		}
   173  	}
   174  	return b[:n], nil
   175  }
   176  func utf16EncodeRune(r rune) (uint16, uint16) {
   177  	if r < utfSelf || r > utfRuneMax {
   178  		return uint16(utfReplacement), uint16(utfReplacement)
   179  	}
   180  	r -= utfSelf
   181  	return uint16(utfSurgA + (r>>10)&0x3FF), uint16(utfSurgB + r&0x3FF)
   182  }
   183  
   184  // UTF16FromString returns the UTF-16 encoding of the UTF-8 string with a
   185  // terminating NUL added.
   186  //
   187  // If the string contains a NUL byte at any location, it returns syscall.EINVAL.
   188  func UTF16FromString(s string) ([]uint16, error) {
   189  	if len(s) == 0 {
   190  		return []uint16{0}, nil
   191  	}
   192  	return utf16Encode([]rune(s + "\x00"))
   193  }
   194  
   195  // UTF16PtrFromString returns pointer to the UTF-16 encoding of the UTF-8 string,
   196  // with a terminating NUL added.
   197  //
   198  // If the string contains a NUL byte at any location, it returns syscall.EINVAL.
   199  func UTF16PtrFromString(s string) (*uint16, error) {
   200  	a, err := UTF16FromString(s)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	return &a[0], nil
   205  }