github.com/primecitizens/pcz/std@v0.2.1/text/unicode/wtf16/decode.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2023 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package wtf16
     9  
    10  import (
    11  	. "github.com/primecitizens/pcz/std/text/unicode/common"
    12  	"github.com/primecitizens/pcz/std/text/unicode/utf16"
    13  	"github.com/primecitizens/pcz/std/text/unicode/utf8"
    14  )
    15  
    16  func WTF8DecodedSize(s ...uint16) (n int, canDecodeInPlace bool) {
    17  	// use utf16.UTF8DecodedSize is fine because WTF8Decode handles
    18  	// invalid surrogates as 3-byte utf8 append, which is the same
    19  	// as utf8.RuneErrorLen
    20  	return utf16.UTF8DecodedSize(s...)
    21  }
    22  
    23  func WTF8DecodeAll(dst []byte, src ...uint16) []byte {
    24  	for n := 0; len(src) != 0; src = src[n:] {
    25  		if dst, n = WTF8Decode(dst, src...); n == 0 {
    26  			// TODO(alloc): grow dst
    27  			return dst
    28  		}
    29  	}
    30  
    31  	return dst
    32  }
    33  
    34  // WTF8Decode returns the WTF-8 encoding of the potentially ill-formed
    35  // UTF-16 src.
    36  func WTF8Decode(dst []byte, src ...uint16) ([]byte, int) {
    37  	var (
    38  		r     rune
    39  		sz, n int
    40  	)
    41  
    42  Loop:
    43  	for ; n < len(src); n++ {
    44  		switch r = rune(src[n]); {
    45  		case r < surr1, surr3 <= r:
    46  			// normal rune
    47  			dst, sz = utf8.EncodeRune(dst, r)
    48  		case surr1 <= r && r < surr2 &&
    49  			n+1 < len(src) &&
    50  			surr2 <= src[n+1] && src[n+1] < surr3:
    51  			// valid surrogate sequence
    52  			n++
    53  			dst, sz = utf8.EncodeRune(dst, utf16.DecodeRune(r, rune(src[n])))
    54  		default:
    55  			// WTF-8 fallback.
    56  			// This only handles the 3-byte case of utf8.AppendRune,
    57  			// as surrogates always fall in that case.
    58  
    59  			if cap(dst)-len(dst) < 3 {
    60  				break Loop
    61  			}
    62  
    63  			if r > MaxRune {
    64  				r = RuneError
    65  			}
    66  
    67  			dst = append(dst, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
    68  		}
    69  
    70  		if sz == 0 {
    71  			break
    72  		}
    73  	}
    74  
    75  	return dst, n
    76  }