github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library@v0.2.0/UTF8/externs.go (about)

     1  package UTF8
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"unicode/utf16"
     7  	"unicode/utf8"
     8  
     9  	"github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library/Wrappers"
    10  	"github.com/dafny-lang/DafnyRuntimeGo/v4/dafny"
    11  )
    12  
    13  // The following constants are copied from the Go utf16 lib and are used
    14  // to check the validity of the utf16 surrogate pairs.
    15  const (
    16  	// 0xd800-0xdc00 encodes the high 10 bits of a pair.
    17  	// 0xdc00-0xe000 encodes the low 10 bits of a pair.
    18  	surr1 = 0xd800
    19  	surr2 = 0xdc00
    20  	surr3 = 0xe000
    21  )
    22  
    23  //IMP: The below extern implementations are only compatible
    24  //with unicode-char:false transpiled code.
    25  
    26  // Decode decodes utf8 encoded Go rune to dafny encoded utf16 char sequence
    27  // Anything we receive here is supposed to be utf8 encoded Go rune.
    28  // And since this extern is for unicode-char:false,
    29  // we need to encode the result in compatible dafny utf16 string before returning
    30  // the result.
    31  func Decode(utf8EncodedDafnySeq dafny.Sequence) Wrappers.Result {
    32  	res, err := DecodeFromNativeGoByteArray(dafny.ToByteArray(utf8EncodedDafnySeq))
    33  	if err != nil {
    34  		return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error()))
    35  	}
    36  
    37  	return Wrappers.Companion_Result_.Create_Success_(res)
    38  }
    39  
    40  // Encode encodes utf16 encoded dafny char (rune) to utf-8 Go rune sequence.
    41  // Anything we receive here is supposed to be utf16 encoded Go rune
    42  // since this extern is for unicode-char:false.
    43  func Encode(utf16EncodedDafnySeq dafny.Sequence) Wrappers.Result {
    44  	utf8EncodedBytes, err := decodeUtf16(utf16EncodedDafnySeq)
    45  	if err != nil {
    46  		return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error()))
    47  	}
    48  	return Wrappers.Companion_Result_.Create_Success_(dafny.SeqOfBytes(utf8EncodedBytes))
    49  }
    50  
    51  // This method is to be called from the Type Conversion layer.
    52  // We reuse the same method so that all conversions are consistent.
    53  func DecodeFromNativeGoByteArray(utf8EncodedByteArray []byte) (dafny.Sequence, error) {
    54  	if !utf8.Valid(utf8EncodedByteArray) {
    55  		return nil, fmt.Errorf("invalid utf8 encoded sequence: %v", utf8EncodedByteArray)
    56  	}
    57  	utf16Encoded := utf16.Encode([]rune(string(utf8EncodedByteArray)))
    58  	var dafnyCharArray []dafny.Char
    59  	for _, c := range utf16Encoded {
    60  		dafnyCharArray = append(dafnyCharArray, dafny.Char(c))
    61  	}
    62  	return dafny.SeqOfChars(dafnyCharArray...), nil
    63  }
    64  
    65  // decode appends to buf the Unicode code point sequence represented
    66  // by the UTF-16 encoding seq, then encode the code point as utf8 and return the utf8 buffer
    67  func decodeUtf16(seq dafny.Sequence) ([]byte, error) {
    68  	utf8EncodedBytes := []byte{}
    69  
    70  	for i := dafny.Iterate(seq); ; {
    71  		firstVal, firstValExists := i()
    72  		if !firstValExists {
    73  			// Iterator has finished, return the buffer
    74  			return utf8EncodedBytes, nil
    75  		} else {
    76  			var ar rune
    77  
    78  			// We should be able to rely on dafny that anything inside the seq is utf16 encoded
    79  			// with unicode-char: false. But given the Long Psi issue, it's better to be safe.
    80  			// First check if it's a dafny.Char type, then check if it's within the limits of uint16.
    81  			firstChar, firstValIsAChar := firstVal.(dafny.Char)
    82  			if !firstValIsAChar || firstChar > math.MaxUint16 || firstChar < 0 {
    83  				return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
    84  			}
    85  
    86  			// Downcast to uint16
    87  			switch r1 := uint16(firstChar); {
    88  
    89  			case r1 < surr1, surr3 <= r1:
    90  				// normal rune
    91  				ar = rune(r1)
    92  
    93  			case utf16.IsSurrogate(rune(r1)):
    94  				// If firstVal is surrogate, then we need the secondVal to construct the pair
    95  				secondVal, ok := i()
    96  
    97  				// Same sanity check as line 84
    98  				secondChar, secondValIsAChar := secondVal.(dafny.Char)
    99  				if !ok || !secondValIsAChar || secondChar > math.MaxUint16 || secondChar < 0 {
   100  					return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
   101  				}
   102  
   103  				// Check if the secondVal is within the valid low surrogate range
   104  				if surr2 <= uint16(secondChar) && uint16(secondChar) < surr3 {
   105  					// valid surrogate sequence
   106  					ar = utf16.DecodeRune(rune(r1), rune(uint16(secondChar)))
   107  				} else {
   108  					return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
   109  				}
   110  			default:
   111  				return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
   112  			}
   113  
   114  			// Create the buffer (upto 4 bytes) to hold the utf8 rune
   115  			buf := make([]byte, utf8.RuneLen(ar))
   116  			n := utf8.EncodeRune(buf, ar)
   117  
   118  			// Append to the result
   119  			utf8EncodedBytes = append(utf8EncodedBytes, buf[:n]...)
   120  		}
   121  	}
   122  }