github.com/nuvolaris/goja@v0.0.0-20230825100449-967811910c6d/unistring/string.go (about)

     1  // Package unistring contains an implementation of a hybrid ASCII/UTF-16 string.
     2  // For ASCII strings the underlying representation is equivalent to a normal Go string.
     3  // For unicode strings the underlying representation is UTF-16 as []uint16 with 0th element set to 0xFEFF.
     4  // unicode.String allows representing malformed UTF-16 values (e.g. stand-alone parts of surrogate pairs)
     5  // which cannot be represented in UTF-8.
     6  // At the same time it is possible to use unicode.String as property keys just as efficiently as simple strings,
     7  // (the leading 0xFEFF ensures there is no clash with ASCII string), and it is possible to convert it
     8  // to valueString without extra allocations.
     9  package unistring
    10  
    11  import (
    12  	"reflect"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  	"unsafe"
    16  )
    17  
    18  const (
    19  	BOM = 0xFEFF
    20  )
    21  
    22  type String string
    23  
    24  // Scan checks if the string contains any unicode characters. If it does, converts to an array suitable for creating
    25  // a String using FromUtf16, otherwise returns nil.
    26  func Scan(s string) []uint16 {
    27  	utf16Size := 0
    28  	for ; utf16Size < len(s); utf16Size++ {
    29  		if s[utf16Size] >= utf8.RuneSelf {
    30  			goto unicode
    31  		}
    32  	}
    33  	return nil
    34  unicode:
    35  	for _, chr := range s[utf16Size:] {
    36  		utf16Size++
    37  		if chr > 0xFFFF {
    38  			utf16Size++
    39  		}
    40  	}
    41  
    42  	buf := make([]uint16, utf16Size+1)
    43  	buf[0] = BOM
    44  	c := 1
    45  	for _, chr := range s {
    46  		if chr <= 0xFFFF {
    47  			buf[c] = uint16(chr)
    48  		} else {
    49  			first, second := utf16.EncodeRune(chr)
    50  			buf[c] = uint16(first)
    51  			c++
    52  			buf[c] = uint16(second)
    53  		}
    54  		c++
    55  	}
    56  
    57  	return buf
    58  }
    59  
    60  func NewFromString(s string) String {
    61  	if buf := Scan(s); buf != nil {
    62  		return FromUtf16(buf)
    63  	}
    64  	return String(s)
    65  }
    66  
    67  func NewFromRunes(s []rune) String {
    68  	ascii := true
    69  	size := 0
    70  	for _, c := range s {
    71  		if c >= utf8.RuneSelf {
    72  			ascii = false
    73  			if c > 0xFFFF {
    74  				size++
    75  			}
    76  		}
    77  		size++
    78  	}
    79  	if ascii {
    80  		return String(s)
    81  	}
    82  	b := make([]uint16, size+1)
    83  	b[0] = BOM
    84  	i := 1
    85  	for _, c := range s {
    86  		if c <= 0xFFFF {
    87  			b[i] = uint16(c)
    88  		} else {
    89  			first, second := utf16.EncodeRune(c)
    90  			b[i] = uint16(first)
    91  			i++
    92  			b[i] = uint16(second)
    93  		}
    94  		i++
    95  	}
    96  	return FromUtf16(b)
    97  }
    98  
    99  func FromUtf16(b []uint16) String {
   100  	var str string
   101  	hdr := (*reflect.StringHeader)(unsafe.Pointer(&str))
   102  	hdr.Data = uintptr(unsafe.Pointer(&b[0]))
   103  	hdr.Len = len(b) * 2
   104  
   105  	return String(str)
   106  }
   107  
   108  func (s String) String() string {
   109  	if b := s.AsUtf16(); b != nil {
   110  		return string(utf16.Decode(b[1:]))
   111  	}
   112  
   113  	return string(s)
   114  }
   115  
   116  func (s String) AsUtf16() []uint16 {
   117  	if len(s) < 4 || len(s)&1 != 0 {
   118  		return nil
   119  	}
   120  
   121  	var a []uint16
   122  	raw := string(s)
   123  
   124  	sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&a))
   125  	sliceHeader.Data = (*reflect.StringHeader)(unsafe.Pointer(&raw)).Data
   126  
   127  	l := len(raw) / 2
   128  
   129  	sliceHeader.Len = l
   130  	sliceHeader.Cap = l
   131  
   132  	if a[0] == BOM {
   133  		return a
   134  	}
   135  
   136  	return nil
   137  }