github.com/primecitizens/pcz/std@v0.2.1/text/unicode/wtf16/encode.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2023 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package wtf16
     9  
    10  import (
    11  	. "github.com/primecitizens/pcz/std/text/unicode/common"
    12  	"github.com/primecitizens/pcz/std/text/unicode/utf16"
    13  	"github.com/primecitizens/pcz/std/text/unicode/utf8"
    14  )
    15  
    16  // Encode returns the potentially ill-formed UTF-16 encoding of s.
    17  func Encode(dst []uint16, s string) []uint16 {
    18  	for i := 0; i < len(s); {
    19  		// Cannot use 'for range s' because it expects valid
    20  		// UTF-8 runes.
    21  		r, size := utf8.First(s[i:])
    22  		if r == RuneError {
    23  			// Check if s[i:] contains a valid WTF-8 encoded surrogate.
    24  			if sc := s[i:]; len(sc) >= 3 &&
    25  				sc[0] == 0xED &&
    26  				0xA0 <= sc[1] && sc[1] <= 0xBF &&
    27  				0x80 <= sc[2] && sc[2] <= 0xBF {
    28  
    29  				r = rune(sc[0]&mask3)<<12 + rune(sc[1]&maskx)<<6 + rune(sc[2]&maskx)
    30  				dst = append(dst, uint16(r))
    31  				i += 3
    32  				continue
    33  			}
    34  		}
    35  		i += size
    36  		dst = utf16.AppendRunes(dst, r)
    37  	}
    38  	return dst
    39  }