github.com/primecitizens/pcz/std@v0.2.1/text/unicode/utf16/encode.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2010 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  // Package utf16 implements encoding and decoding of UTF-16 sequences.
     9  package utf16
    10  
    11  import (
    12  	. "github.com/primecitizens/pcz/std/text/unicode/common"
    13  )
    14  
    15  // EncodedSize returns the number of uint16s required to hold the UTF-16
    16  // encoding of unicode code points in s.
    17  func EncodedSize(s string) int {
    18  	n := len(s)
    19  	for _, v := range s {
    20  		if v >= surrSelf {
    21  			n++
    22  		}
    23  	}
    24  
    25  	return n
    26  }
    27  
    28  // EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
    29  //
    30  // If the rune is not a valid Unicode code point or does not need encoding,
    31  // EncodeRune returns U+FFFD, U+FFFD.
    32  func EncodeRune(r rune) (r1, r2 rune) {
    33  	if r < surrSelf || r > MaxRune {
    34  		return RuneError, RuneError
    35  	}
    36  	r -= surrSelf
    37  	return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
    38  }
    39  
    40  // AppendRunes appends the UTF-16 encoding of the Unicode
    41  // code point sequence src to dst.
    42  func AppendRunes(dst []uint16, src ...rune) []uint16 {
    43  	for n := 0; len(src) != 0; src = src[n:] {
    44  		if dst, n = EncodeRunes(dst, src...); n == 0 {
    45  			// TODO(alloc): grow dst
    46  			return dst
    47  		}
    48  	}
    49  
    50  	return dst
    51  }
    52  
    53  func EncodeRunes(dst []uint16, s ...rune) ([]uint16, int) {
    54  	var (
    55  		pos = len(dst)
    56  		i   int
    57  		v   rune
    58  	)
    59  	dst = dst[:cap(dst)]
    60  
    61  	for i = 0; i < len(s) && pos < len(dst); i++ {
    62  		switch v = s[i]; {
    63  		case 0 <= v && v < surr1, surr3 <= v && v < surrSelf:
    64  			// normal rune
    65  			dst[pos] = uint16(v)
    66  			pos++
    67  		case surrSelf <= v && v <= MaxRune:
    68  			// needs surrogate sequence
    69  			r1, r2 := EncodeRune(v)
    70  			dst[pos] = uint16(r1)
    71  			dst[pos+1] = uint16(r2)
    72  			pos += 2
    73  		default:
    74  			dst[pos] = uint16(RuneError)
    75  			pos++
    76  		}
    77  	}
    78  
    79  	return dst[:pos], i
    80  }