github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library@v0.2.0/UTF8/externs.go (about) 1 package UTF8 2 3 import ( 4 "fmt" 5 "math" 6 "unicode/utf16" 7 "unicode/utf8" 8 9 "github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library/Wrappers" 10 "github.com/dafny-lang/DafnyRuntimeGo/v4/dafny" 11 ) 12 13 // The following constants are copied from the Go utf16 lib and are used 14 // to check the validity of the utf16 surrogate pairs. 15 const ( 16 // 0xd800-0xdc00 encodes the high 10 bits of a pair. 17 // 0xdc00-0xe000 encodes the low 10 bits of a pair. 18 surr1 = 0xd800 19 surr2 = 0xdc00 20 surr3 = 0xe000 21 ) 22 23 //IMP: The below extern implementations are only compatible 24 //with unicode-char:false transpiled code. 25 26 // Decode decodes utf8 encoded Go rune to dafny encoded utf16 char sequence 27 // Anything we receive here is supposed to be utf8 encoded Go rune. 28 // And since this extern is for unicode-char:false, 29 // we need to encode the result in compatible dafny utf16 string before returning 30 // the result. 31 func Decode(utf8EncodedDafnySeq dafny.Sequence) Wrappers.Result { 32 res, err := DecodeFromNativeGoByteArray(dafny.ToByteArray(utf8EncodedDafnySeq)) 33 if err != nil { 34 return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error())) 35 } 36 37 return Wrappers.Companion_Result_.Create_Success_(res) 38 } 39 40 // Encode encodes utf16 encoded dafny char (rune) to utf-8 Go rune sequence. 41 // Anything we receive here is supposed to be utf16 encoded Go rune 42 // since this extern is for unicode-char:false. 43 func Encode(utf16EncodedDafnySeq dafny.Sequence) Wrappers.Result { 44 utf8EncodedBytes, err := decodeUtf16(utf16EncodedDafnySeq) 45 if err != nil { 46 return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error())) 47 } 48 return Wrappers.Companion_Result_.Create_Success_(dafny.SeqOfBytes(utf8EncodedBytes)) 49 } 50 51 // This method is to be called from the Type Conversion layer. 52 // We reuse the same method so that all conversions are consistent. 53 func DecodeFromNativeGoByteArray(utf8EncodedByteArray []byte) (dafny.Sequence, error) { 54 if !utf8.Valid(utf8EncodedByteArray) { 55 return nil, fmt.Errorf("invalid utf8 encoded sequence: %v", utf8EncodedByteArray) 56 } 57 utf16Encoded := utf16.Encode([]rune(string(utf8EncodedByteArray))) 58 var dafnyCharArray []dafny.Char 59 for _, c := range utf16Encoded { 60 dafnyCharArray = append(dafnyCharArray, dafny.Char(c)) 61 } 62 return dafny.SeqOfChars(dafnyCharArray...), nil 63 } 64 65 // decode appends to buf the Unicode code point sequence represented 66 // by the UTF-16 encoding seq, then encode the code point as utf8 and return the utf8 buffer 67 func decodeUtf16(seq dafny.Sequence) ([]byte, error) { 68 utf8EncodedBytes := []byte{} 69 70 for i := dafny.Iterate(seq); ; { 71 firstVal, firstValExists := i() 72 if !firstValExists { 73 // Iterator has finished, return the buffer 74 return utf8EncodedBytes, nil 75 } else { 76 var ar rune 77 78 // We should be able to rely on dafny that anything inside the seq is utf16 encoded 79 // with unicode-char: false. But given the Long Psi issue, it's better to be safe. 80 // First check if it's a dafny.Char type, then check if it's within the limits of uint16. 81 firstChar, firstValIsAChar := firstVal.(dafny.Char) 82 if !firstValIsAChar || firstChar > math.MaxUint16 || firstChar < 0 { 83 return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq) 84 } 85 86 // Downcast to uint16 87 switch r1 := uint16(firstChar); { 88 89 case r1 < surr1, surr3 <= r1: 90 // normal rune 91 ar = rune(r1) 92 93 case utf16.IsSurrogate(rune(r1)): 94 // If firstVal is surrogate, then we need the secondVal to construct the pair 95 secondVal, ok := i() 96 97 // Same sanity check as line 84 98 secondChar, secondValIsAChar := secondVal.(dafny.Char) 99 if !ok || !secondValIsAChar || secondChar > math.MaxUint16 || secondChar < 0 { 100 return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq) 101 } 102 103 // Check if the secondVal is within the valid low surrogate range 104 if surr2 <= uint16(secondChar) && uint16(secondChar) < surr3 { 105 // valid surrogate sequence 106 ar = utf16.DecodeRune(rune(r1), rune(uint16(secondChar))) 107 } else { 108 return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq) 109 } 110 default: 111 return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq) 112 } 113 114 // Create the buffer (upto 4 bytes) to hold the utf8 rune 115 buf := make([]byte, utf8.RuneLen(ar)) 116 n := utf8.EncodeRune(buf, ar) 117 118 // Append to the result 119 utf8EncodedBytes = append(utf8EncodedBytes, buf[:n]...) 120 } 121 } 122 }